Merge tag 'usb-5.13-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/usb

author Linus Torvalds <torvalds@linux-foundation.org>

Sun, 16 May 2021 16:55:05 +0000 (09:55 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 16 May 2021 16:55:05 +0000 (09:55 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Sun, 16 May 2021 16:55:05 +0000 (09:55 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 16 May 2021 16:55:05 +0000 (09:55 -0700)
diff --git a/.mailmap b/.mailmap

index 3e2bff9..ce6c497 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -160,6 +160,7 @@ Jeff Layton <jlayton@kernel.org> <jlayton@primarydata.com>
  Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
  Jens Axboe <axboe@suse.de>
  Jens Osterkamp <Jens.Osterkamp@de.ibm.com>
+Jernej Skrabec <jernej.skrabec@gmail.com> <jernej.skrabec@siol.net>
  Jiri Slaby <jirislaby@kernel.org> <jirislaby@gmail.com>
  Jiri Slaby <jirislaby@kernel.org> <jslaby@novell.com>
  Jiri Slaby <jirislaby@kernel.org> <jslaby@suse.com>
diff --git a/Documentation/ABI/obsolete/sysfs-class-dax b/Documentation/ABI/obsolete/sysfs-class-dax

index 0faf135..5bcce27 100644 (file)
--- a/Documentation/ABI/obsolete/sysfs-class-dax
+++ b/Documentation/ABI/obsolete/sysfs-class-dax
@@ -1,7 +1,7 @@
  What:           /sys/class/dax/
  Date:           May, 2016
  KernelVersion:  v4.7
-Contact:        linux-nvdimm@lists.01.org
+Contact:        nvdimm@lists.linux.dev
  Description:   Device DAX is the device-centric analogue of Filesystem
                 DAX (CONFIG_FS_DAX).  It allows memory ranges to be
                 allocated and mapped without need of an intervening file
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered

index 0360be3..dae880b 100644 (file)
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
@@ -1,4 +1,4 @@
-This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬
+This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.
  
  What:          /sys/kernel/fadump_registered
  Date:          Feb 2012
diff --git a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem

index 6ce0b12..ca2396e 100644 (file)
--- a/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
+++ b/Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
@@ -1,4 +1,4 @@
-This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬
+This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.
  
  What:          /sys/kernel/fadump_release_mem
  Date:          Feb 2012
diff --git a/Documentation/ABI/removed/sysfs-bus-nfit b/Documentation/ABI/removed/sysfs-bus-nfit

index ae8c1ca..2774370 100644 (file)
--- a/Documentation/ABI/removed/sysfs-bus-nfit
+++ b/Documentation/ABI/removed/sysfs-bus-nfit
@@ -1,7 +1,7 @@
  What:          /sys/bus/nd/devices/regionX/nfit/ecc_unit_size
  Date:          Aug, 2017
  KernelVersion: v4.14 (Removed v4.18)
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Size of a write request to a DIMM that will not incur a
                 read-modify-write cycle at the memory controller.
diff --git a/Documentation/ABI/testing/sysfs-bus-nfit b/Documentation/ABI/testing/sysfs-bus-nfit

index 63ef0b9..e7282d1 100644 (file)
--- a/Documentation/ABI/testing/sysfs-bus-nfit
+++ b/Documentation/ABI/testing/sysfs-bus-nfit
@@ -5,7 +5,7 @@ Interface Table (NFIT)' section in the ACPI specification
  What:          /sys/bus/nd/devices/nmemX/nfit/serial
  Date:          Jun, 2015
  KernelVersion: v4.2
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Serial number of the NVDIMM (non-volatile dual in-line
                 memory module), assigned by the module vendor.
@@ -14,7 +14,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/handle
  Date:          Apr, 2015
  KernelVersion: v4.2
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) The address (given by the _ADR object) of the device on its
                 parent bus of the NVDIMM device containing the NVDIMM region.
@@ -23,7 +23,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/device
  Date:          Apr, 2015
  KernelVersion: v4.1
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Device id for the NVDIMM, assigned by the module vendor.
  
@@ -31,7 +31,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/rev_id
  Date:          Jun, 2015
  KernelVersion: v4.2
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Revision of the NVDIMM, assigned by the module vendor.
  
@@ -39,7 +39,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/phys_id
  Date:          Apr, 2015
  KernelVersion: v4.2
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Handle (i.e., instance number) for the SMBIOS (system
                 management BIOS) Memory Device structure describing the NVDIMM
@@ -49,7 +49,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/flags
  Date:          Jun, 2015
  KernelVersion: v4.2
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) The flags in the NFIT memory device sub-structure indicate
                 the state of the data on the nvdimm relative to its energy
@@ -68,7 +68,7 @@ What:         /sys/bus/nd/devices/nmemX/nfit/format1
  What:          /sys/bus/nd/devices/nmemX/nfit/formats
  Date:          Apr, 2016
  KernelVersion: v4.7
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) The interface codes indicate support for persistent memory
                 mapped directly into system physical address space and / or a
@@ -84,7 +84,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/vendor
  Date:          Apr, 2016
  KernelVersion: v4.7
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Vendor id of the NVDIMM.
  
@@ -92,7 +92,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/dsm_mask
  Date:          May, 2016
  KernelVersion: v4.7
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) The bitmask indicates the supported device specific control
                 functions relative to the NVDIMM command family supported by the
@@ -102,7 +102,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/family
  Date:          Apr, 2016
  KernelVersion: v4.7
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Displays the NVDIMM family command sets. Values
                 0, 1, 2 and 3 correspond to NVDIMM_FAMILY_INTEL,
@@ -118,7 +118,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/id
  Date:          Apr, 2016
  KernelVersion: v4.7
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) ACPI specification 6.2 section 5.2.25.9, defines an
                 identifier for an NVDIMM, which refelects the id attribute.
@@ -127,7 +127,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/subsystem_vendor
  Date:          Apr, 2016
  KernelVersion: v4.7
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Sub-system vendor id of the NVDIMM non-volatile memory
                 subsystem controller.
@@ -136,7 +136,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/subsystem_rev_id
  Date:          Apr, 2016
  KernelVersion: v4.7
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Sub-system revision id of the NVDIMM non-volatile memory subsystem
                 controller, assigned by the non-volatile memory subsystem
@@ -146,7 +146,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/nfit/subsystem_device
  Date:          Apr, 2016
  KernelVersion: v4.7
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) Sub-system device id for the NVDIMM non-volatile memory
                 subsystem controller, assigned by the non-volatile memory
@@ -156,7 +156,7 @@ Description:
  What:          /sys/bus/nd/devices/ndbusX/nfit/revision
  Date:          Jun, 2015
  KernelVersion: v4.2
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) ACPI NFIT table revision number.
  
@@ -164,7 +164,7 @@ Description:
  What:          /sys/bus/nd/devices/ndbusX/nfit/scrub
  Date:          Sep, 2016
  KernelVersion: v4.9
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RW) This shows the number of full Address Range Scrubs (ARS)
                 that have been completed since driver load time. Userspace can
@@ -177,7 +177,7 @@ Description:
  What:          /sys/bus/nd/devices/ndbusX/nfit/hw_error_scrub
  Date:          Sep, 2016
  KernelVersion: v4.9
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RW) Provides a way to toggle the behavior between just adding
                 the address (cache line) where the MCE happened to the poison
@@ -196,7 +196,7 @@ Description:
  What:          /sys/bus/nd/devices/ndbusX/nfit/dsm_mask
  Date:          Jun, 2017
  KernelVersion: v4.13
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) The bitmask indicates the supported bus specific control
                 functions. See the section named 'NVDIMM Root Device _DSMs' in
@@ -205,7 +205,7 @@ Description:
  What:          /sys/bus/nd/devices/ndbusX/nfit/firmware_activate_noidle
  Date:          Apr, 2020
  KernelVersion: v5.8
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RW) The Intel platform implementation of firmware activate
                 support exposes an option let the platform force idle devices in
@@ -225,7 +225,7 @@ Description:
  What:          /sys/bus/nd/devices/regionX/nfit/range_index
  Date:          Jun, 2015
  KernelVersion: v4.2
-Contact:       linux-nvdimm@lists.01.org
+Contact:       nvdimm@lists.linux.dev
  Description:
                 (RO) A unique number provided by the BIOS to identify an address
                 range. Used by NVDIMM Region Mapping Structure to uniquely refer
diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem

index 8316c33..92e2db0 100644 (file)
--- a/Documentation/ABI/testing/sysfs-bus-papr-pmem
+++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
@@ -1,7 +1,7 @@
  What:          /sys/bus/nd/devices/nmemX/papr/flags
  Date:          Apr, 2020
  KernelVersion: v5.8
-Contact:       linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
+Contact:       linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
  Description:
                 (RO) Report flags indicating various states of a
                 papr-pmem NVDIMM device. Each flag maps to a one or
@@ -36,7 +36,7 @@ Description:
  What:          /sys/bus/nd/devices/nmemX/papr/perf_stats
  Date:          May, 2020
  KernelVersion: v5.9
-Contact:       linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
+Contact:       linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, nvdimm@lists.linux.dev,
  Description:
                 (RO) Report various performance stats related to papr-scm NVDIMM
                 device.  Each stat is reported on a new line with each line
diff --git a/Documentation/ABI/testing/sysfs-module b/Documentation/ABI/testing/sysfs-module

index a485434..88bddf1 100644 (file)
--- a/Documentation/ABI/testing/sysfs-module
+++ b/Documentation/ABI/testing/sysfs-module
@@ -37,13 +37,13 @@ Description:        Maximum time allowed for periodic transfers per microframe (μs)
  
  What:          /sys/module/*/{coresize,initsize}
  Date:          Jan 2012
-KernelVersion:»·3.3
+KernelVersion: 3.3
  Contact:       Kay Sievers <kay.sievers@vrfy.org>
  Description:   Module size in bytes.
  
  What:          /sys/module/*/taint
  Date:          Jan 2012
-KernelVersion:»·3.3
+KernelVersion: 3.3
  Contact:       Kay Sievers <kay.sievers@vrfy.org>
  Description:   Module taint flags:
                         ==  =====================
diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst

index 1d56a6b..7ca8df5 100644 (file)
--- a/Documentation/admin-guide/sysctl/kernel.rst
+++ b/Documentation/admin-guide/sysctl/kernel.rst
@@ -483,10 +483,11 @@ modprobe
  ========
  
  The full path to the usermode helper for autoloading kernel modules,
-by default "/sbin/modprobe".  This binary is executed when the kernel
-requests a module.  For example, if userspace passes an unknown
-filesystem type to mount(), then the kernel will automatically request
-the corresponding filesystem module by executing this usermode helper.
+by default ``CONFIG_MODPROBE_PATH``, which in turn defaults to
+"/sbin/modprobe".  This binary is executed when the kernel requests a
+module.  For example, if userspace passes an unknown filesystem type
+to mount(), then the kernel will automatically request the
+corresponding filesystem module by executing this usermode helper.
  This usermode helper should insert the needed module into the kernel.
  
  This sysctl only affects module autoloading.  It has no effect on the
diff --git a/Documentation/block/data-integrity.rst b/Documentation/block/data-integrity.rst

index 4f2452a..07a97aa 100644 (file)
--- a/Documentation/block/data-integrity.rst
+++ b/Documentation/block/data-integrity.rst
@@ -1,4 +1,4 @@
-==============
+==============
  Data Integrity
  ==============
  
diff --git a/Documentation/cdrom/cdrom-standard.rst b/Documentation/cdrom/cdrom-standard.rst

index 70500b1..5845960 100644 (file)
--- a/Documentation/cdrom/cdrom-standard.rst
+++ b/Documentation/cdrom/cdrom-standard.rst
@@ -146,18 +146,18 @@ with the kernel as a block device by registering the following general
  *struct file_operations*::
  
         struct file_operations cdrom_fops = {
-               NULL,                   /∗ lseek ∗/
-               block _read ,           /∗ read—general block-dev read ∗/
-               block _write,           /∗ write—general block-dev write ∗/
-               NULL,                   /∗ readdir ∗/
-               NULL,                   /∗ select ∗/
-               cdrom_ioctl,            /∗ ioctl ∗/
-               NULL,                   /∗ mmap ∗/
-               cdrom_open,             /∗ open ∗/
-               cdrom_release,          /∗ release ∗/
-               NULL,                   /∗ fsync ∗/
-               NULL,                   /∗ fasync ∗/
-               NULL                    /∗ revalidate ∗/
+               NULL,                   /* lseek */
+               block _read ,           /* read--general block-dev read */
+               block _write,           /* write--general block-dev write */
+               NULL,                   /* readdir */
+               NULL,                   /* select */
+               cdrom_ioctl,            /* ioctl */
+               NULL,                   /* mmap */
+               cdrom_open,             /* open */
+               cdrom_release,          /* release */
+               NULL,                   /* fsync */
+               NULL,                   /* fasync */
+               NULL                    /* revalidate */
         };
  
  Every active CD-ROM device shares this *struct*. The routines
@@ -250,12 +250,12 @@ The drive-specific, minor-like information that is registered with
  `cdrom.c`, currently contains the following fields::
  
    struct cdrom_device_info {
-       const struct cdrom_device_ops * ops;    /* device operations for this major */
+       const struct cdrom_device_ops * ops;    /* device operations for this major */
         struct list_head list;                  /* linked list of all device_info */
         struct gendisk * disk;                  /* matching block layer disk */
         void *  handle;                         /* driver-dependent data */
  
-       int mask;                               /* mask of capability: disables them */
+       int mask;                               /* mask of capability: disables them */
         int speed;                              /* maximum speed for reading data */
         int capacity;                           /* number of discs in a jukebox */
  
@@ -569,7 +569,7 @@ the *CDC_CLOSE_TRAY* bit in *mask*.
  
  In the file `cdrom.c` you will encounter many constructions of the type::
  
-       if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ...
+       if (cdo->capability & ~cdi->mask & CDC _<capability>) ...
  
  There is no *ioctl* to set the mask... The reason is that
  I think it is better to control the **behavior** rather than the
diff --git a/Documentation/driver-api/nvdimm/nvdimm.rst b/Documentation/driver-api/nvdimm/nvdimm.rst

index ef6d59e..1d8302b 100644 (file)
--- a/Documentation/driver-api/nvdimm/nvdimm.rst
+++ b/Documentation/driver-api/nvdimm/nvdimm.rst
@@ -4,7 +4,7 @@ LIBNVDIMM: Non-Volatile Devices
  
  libnvdimm - kernel / libndctl - userspace helper library
  
-linux-nvdimm@lists.01.org
+nvdimm@lists.linux.dev
  
  Version 13
  
diff --git a/Documentation/driver-api/serial/index.rst b/Documentation/driver-api/serial/index.rst

index 21351b8..8f7d7af 100644 (file)
--- a/Documentation/driver-api/serial/index.rst
+++ b/Documentation/driver-api/serial/index.rst
@@ -19,7 +19,6 @@ Serial drivers
  
      moxa-smartio
      n_gsm
-    rocket
      serial-iso7816
      serial-rs485
  
diff --git a/Documentation/filesystems/erofs.rst b/Documentation/filesystems/erofs.rst

index bf14517..832839f 100644 (file)
--- a/Documentation/filesystems/erofs.rst
+++ b/Documentation/filesystems/erofs.rst
@@ -50,8 +50,8 @@ Here is the main features of EROFS:
  
   - Support POSIX.1e ACLs by using xattrs;
  
- - Support transparent file compression as an option:
-   LZ4 algorithm with 4 KB fixed-sized output compression for high performance.
+ - Support transparent data compression as an option:
+   LZ4 algorithm with the fixed-sized output compression for high performance.
  
  The following git tree provides the file system user-space tools under
  development (ex, formatting tool mkfs.erofs):
@@ -113,31 +113,31 @@ may not. All metadatas can be now observed in two different spaces (views):
  
      ::
  
-                                   |-> aligned with 8B
-                                           |-> followed closely
-       + meta_blkaddr blocks                                      |-> another slot
-       _____________________________________________________________________
-       |  ...   | inode |  xattrs  | extents  | data inline | ... | inode ...
-       |________|_______|(optional)|(optional)|__(optional)_|_____|__________
-               |-> aligned with the inode slot size
-                   .                   .
-                   .                         .
-               .                              .
-               .                                    .
-           .                                         .
-           .                                              .
-       .____________________________________________________|-> aligned with 4B
-       | xattr_ibody_header | shared xattrs | inline xattrs |
-       |____________________|_______________|_______________|
-       |->    12 bytes    <-|->x * 4 bytes<-|               .
-                           .                .                 .
-                       .                      .                   .
-               .                           .                     .
-           ._______________________________.______________________.
-           | id | id | id | id |  ... | id | ent | ... | ent| ... |
-           |____|____|____|____|______|____|_____|_____|____|_____|
-                                           |-> aligned with 4B
-                                                       |-> aligned with 4B
+                                 |-> aligned with 8B
+                                            |-> followed closely
+     + meta_blkaddr blocks                                      |-> another slot
+       _____________________________________________________________________
+     |  ...   | inode |  xattrs  | extents  | data inline | ... | inode ...
+     |________|_______|(optional)|(optional)|__(optional)_|_____|__________
+              |-> aligned with the inode slot size
+                   .                   .
+                 .                         .
+               .                              .
+             .                                    .
+           .                                         .
+         .                                              .
+       .____________________________________________________|-> aligned with 4B
+       | xattr_ibody_header | shared xattrs | inline xattrs |
+       |____________________|_______________|_______________|
+       |->    12 bytes    <-|->x * 4 bytes<-|               .
+                           .                .                 .
+                     .                      .                   .
+                .                           .                     .
+            ._______________________________.______________________.
+            | id | id | id | id |  ... | id | ent | ... | ent| ... |
+            |____|____|____|____|______|____|_____|_____|____|_____|
+                                            |-> aligned with 4B
+                                                        |-> aligned with 4B
  
      Inode could be 32 or 64 bytes, which can be distinguished from a common
      field which all inode versions have -- i_format::
@@ -175,13 +175,13 @@ may not. All metadatas can be now observed in two different spaces (views):
      Each share xattr can also be directly found by the following formula:
           xattr offset = xattr_blkaddr * block_size + 4 * xattr_id
  
-    ::
+::
  
-                           |-> aligned by  4 bytes
-       + xattr_blkaddr blocks                     |-> aligned with 4 bytes
-       _________________________________________________________________________
-       |  ...   | xattr_entry |  xattr data | ... |  xattr_entry | xattr data  ...
-       |________|_____________|_____________|_____|______________|_______________
+                           |-> aligned by  4 bytes
+    + xattr_blkaddr blocks                     |-> aligned with 4 bytes
+     _________________________________________________________________________
+    |  ...   | xattr_entry |  xattr data | ... |  xattr_entry | xattr data  ...
+    |________|_____________|_____________|_____|______________|_______________
  
  Directories
  -----------
@@ -193,48 +193,77 @@ algorithm (could refer to the related source code).
  
  ::
  
-                   ___________________________
-                   /                           |
-               /              ______________|________________
-               /              /              | nameoff1       | nameoffN-1
-    ____________.______________._______________v________________v__________
-    | dirent | dirent | ... | dirent | filename | filename | ... | filename |
-    |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
-       \                           ^
-       \                          |                           * could have
-       \                         |                             trailing '\0'
-           \________________________| nameoff0
-
-                               Directory block
+                  ___________________________
+                 /                           |
+                /              ______________|________________
+               /              /              | nameoff1       | nameoffN-1
+  ____________.______________._______________v________________v__________
+ | dirent | dirent | ... | dirent | filename | filename | ... | filename |
+ |___.0___|____1___|_____|___N-1__|____0_____|____1_____|_____|___N-1____|
+      \                           ^
+       \                          |                           * could have
+        \                         |                             trailing '\0'
+         \________________________| nameoff0
+                             Directory block
  
  Note that apart from the offset of the first filename, nameoff0 also indicates
  the total number of directory entries in this block since it is no need to
  introduce another on-disk field at all.
  
-Compression
------------
-Currently, EROFS supports 4KB fixed-sized output transparent file compression,
-as illustrated below::
-
-           |---- Variant-Length Extent ----|-------- VLE --------|----- VLE -----
-           clusterofs                      clusterofs            clusterofs
-           |                               |                     |   logical data
-    _________v_______________________________v_____________________v_______________
-    ... |    .        |             |        .    |             |  .          | ...
-    ____|____.________|_____________|________.____|_____________|__.__________|____
-       |-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|-> cluster <-|
-           size          size          size          size          size
-           .                             .                .                   .
-           .                       .               .                  .
-               .                  .              .                .
-       _______._____________._____________._____________._____________________
-           ... |             |             |             | ... physical data
-       _______|_____________|_____________|_____________|_____________________
-               |-> cluster <-|-> cluster <-|-> cluster <-|
-                   size          size          size
-
-Currently each on-disk physical cluster can contain 4KB (un)compressed data
-at most. For each logical cluster, there is a corresponding on-disk index to
-describe its cluster type, physical cluster address, etc.
-
-See "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
+Data compression
+----------------
+EROFS implements LZ4 fixed-sized output compression which generates fixed-sized
+compressed data blocks from variable-sized input in contrast to other existing
+fixed-sized input solutions. Relatively higher compression ratios can be gotten
+by using fixed-sized output compression since nowadays popular data compression
+algorithms are mostly LZ77-based and such fixed-sized output approach can be
+benefited from the historical dictionary (aka. sliding window).
+
+In details, original (uncompressed) data is turned into several variable-sized
+extents and in the meanwhile, compressed into physical clusters (pclusters).
+In order to record each variable-sized extent, logical clusters (lclusters) are
+introduced as the basic unit of compress indexes to indicate whether a new
+extent is generated within the range (HEAD) or not (NONHEAD). Lclusters are now
+fixed in block size, as illustrated below::
+
+          |<-    variable-sized extent    ->|<-       VLE         ->|
+        clusterofs                        clusterofs              clusterofs
+          |                                 |                       |
+ _________v_________________________________v_______________________v________
+ ... |    .         |              |        .     |              |  .   ...
+ ____|____._________|______________|________.___ _|______________|__.________
+     |-> lcluster <-|-> lcluster <-|-> lcluster <-|-> lcluster <-|
+          (HEAD)        (NONHEAD)       (HEAD)        (NONHEAD)    .
+           .             CBLKCNT            .                    .
+            .                               .                  .
+             .                              .                .
+       _______._____________________________.______________._________________
+          ... |              |              |              | ...
+       _______|______________|______________|______________|_________________
+              |->      big pcluster       <-|-> pcluster <-|
+
+A physical cluster can be seen as a container of physical compressed blocks
+which contains compressed data. Previously, only lcluster-sized (4KB) pclusters
+were supported. After big pcluster feature is introduced (available since
+Linux v5.13), pcluster can be a multiple of lcluster size.
+
+For each HEAD lcluster, clusterofs is recorded to indicate where a new extent
+starts and blkaddr is used to seek the compressed data. For each NONHEAD
+lcluster, delta0 and delta1 are available instead of blkaddr to indicate the
+distance to its HEAD lcluster and the next HEAD lcluster. A PLAIN lcluster is
+also a HEAD lcluster except that its data is uncompressed. See the comments
+around "struct z_erofs_vle_decompressed_index" in erofs_fs.h for more details.
+
+If big pcluster is enabled, pcluster size in lclusters needs to be recorded as
+well. Let the delta0 of the first NONHEAD lcluster store the compressed block
+count with a special flag as a new called CBLKCNT NONHEAD lcluster. It's easy
+to understand its delta0 is constantly 1, as illustrated below::
+
+   __________________________________________________________
+  | HEAD |  NONHEAD  | NONHEAD | ... | NONHEAD | HEAD | HEAD |
+  |__:___|_(CBLKCNT)_|_________|_____|_________|__:___|____:_|
+     |<----- a big pcluster (with CBLKCNT) ------>|<--  -->|
+           a lcluster-sized pcluster (without CBLKCNT) ^
+
+If another HEAD follows a HEAD lcluster, there is no room to record CBLKCNT,
+but it's easy to know the size of such pcluster is 1 lcluster as well.
diff --git a/Documentation/hwmon/tmp103.rst b/Documentation/hwmon/tmp103.rst

index e195a7d..b3ef814 100644 (file)
--- a/Documentation/hwmon/tmp103.rst
+++ b/Documentation/hwmon/tmp103.rst
@@ -21,10 +21,10 @@ Description
  The TMP103 is a digital output temperature sensor in a four-ball
  wafer chip-scale package (WCSP). The TMP103 is capable of reading
  temperatures to a resolution of 1°C. The TMP103 is specified for
-operation over a temperature range of –40°C to +125°C.
+operation over a temperature range of -40°C to +125°C.
  
  Resolution: 8 Bits
-Accuracy: ±1°C Typ (–10°C to +100°C)
+Accuracy: ±1°C Typ (-10°C to +100°C)
  
  The driver provides the common sysfs-interface for temperatures (see
  Documentation/hwmon/sysfs-interface.rst under Temperatures).
diff --git a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst

index 8a9b185..2d3f6bd 100644 (file)
--- a/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/i40e.rst
@@ -173,7 +173,7 @@ Director rule is added from ethtool (Sideband filter), ATR is turned off by the
  driver. To re-enable ATR, the sideband can be disabled with the ethtool -K
  option. For example::
  
-  ethtool –K [adapter] ntuple [off|on]
+  ethtool -K [adapter] ntuple [off|on]
  
  If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a
  TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is
@@ -688,7 +688,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
  Totals must be equal or less than port speed.
  
  For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
-monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+monitoring tools such as `ifstat` or `sar -n DEV [interval] [number of samples]`
  
  2. Enable HW TC offload on interface::
  
diff --git a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst

index 52e037b..25330b7 100644 (file)
--- a/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
+++ b/Documentation/networking/device_drivers/ethernet/intel/iavf.rst
@@ -179,7 +179,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
  Totals must be equal or less than port speed.
  
  For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
-monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+monitoring tools such as ``ifstat`` or ``sar -n DEV [interval] [number of samples]``
  
  NOTE:
    Setting up channels via ethtool (ethtool -L) is not supported when the
diff --git a/Documentation/process/kernel-enforcement-statement.rst b/Documentation/process/kernel-enforcement-statement.rst

index e5a1be4..dc2d813 100644 (file)
--- a/Documentation/process/kernel-enforcement-statement.rst
+++ b/Documentation/process/kernel-enforcement-statement.rst
@@ -1,4 +1,4 @@
-.. _process_statement_kernel:
+.. _process_statement_kernel:
  
  Linux Kernel Enforcement Statement
  ----------------------------------
diff --git a/Documentation/security/tpm/xen-tpmfront.rst b/Documentation/security/tpm/xen-tpmfront.rst

index 00d5b1d..31c6752 100644 (file)
--- a/Documentation/security/tpm/xen-tpmfront.rst
+++ b/Documentation/security/tpm/xen-tpmfront.rst
@@ -1,4 +1,4 @@
-=============================
+=============================
  Virtual TPM interface for Xen
  =============================
  
diff --git a/Documentation/timers/no_hz.rst b/Documentation/timers/no_hz.rst

index c4c70e1..6cadad7 100644 (file)
--- a/Documentation/timers/no_hz.rst
+++ b/Documentation/timers/no_hz.rst
@@ -1,4 +1,4 @@
-======================================
+======================================
  NO_HZ: Reducing Scheduling-Clock Ticks
  ======================================
  
diff --git a/Documentation/translations/zh_CN/SecurityBugs b/Documentation/translations/zh_CN/SecurityBugs

deleted file mode 100644 (file)

index 2d0fffd..0000000
--- a/Documentation/translations/zh_CN/SecurityBugs
+++ /dev/null
@@ -1,50 +0,0 @@
-Chinese translated version of Documentation/admin-guide/security-bugs.rst
-
-If you have any comment or update to the content, please contact the
-original document maintainer directly.  However, if you have a problem
-communicating in English you can also ask the Chinese maintainer for
-help.  Contact the Chinese maintainer if this translation is outdated
-or if there is a problem with the translation.
-
-Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
----------------------------------------------------------------------
-Documentation/admin-guide/security-bugs.rst 的中文翻译
-
-如果想评论或更新本文的内容，请直接联系原文档的维护者。如果你使用英文
-交流有困难的话，也可以向中文版维护者求助。如果本翻译更新不及时或者翻
-译存在问题，请联系中文版维护者。
-
-中文版维护者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-中文版翻译者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-中文版校译者： 贾威威 Harry Wei <harryxiyou@gmail.com>
-
-
-以下为正文
----------------------------------------------------------------------
-Linux内核开发者认为安全非常重要。因此，我们想要知道当一个有关于
-安全的漏洞被发现的时候，并且它可能会被尽快的修复或者公开。请把这个安全
-漏洞报告给Linux内核安全团队。
-
-1) 联系
-
-linux内核安全团队可以通过email<security@kernel.org>来联系。这是
-一组独立的安全工作人员，可以帮助改善漏洞报告并且公布和取消一个修复。安
-全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。
-当遇到任何漏洞，所能提供的信息越多就越能诊断和修复。如果你不清楚什么
-是有帮助的信息，那就请重温一下admin-guide/reporting-bugs.rst文件中的概述过程。任
-何攻击性的代码都是非常有用的，未经报告者的同意不会被取消，除非它已经
-被公布于众。
-
-2) 公开
-
-Linux内核安全团队的宗旨就是和漏洞提交者一起处理漏洞的解决方案直
-到公开。我们喜欢尽快地完全公开漏洞。当一个漏洞或者修复还没有被完全地理
-解，解决方案没有通过测试或者供应商协调，可以合理地延迟公开。然而，我们
-期望这些延迟尽可能的短些，是可数的几天，而不是几个星期或者几个月。公开
-日期是通过安全团队和漏洞提供者以及供应商洽谈后的结果。公开时间表是从很
-短（特殊的，它已经被公众所知道）到几个星期。作为一个基本的默认政策，我
-们所期望通知公众的日期是7天的安排。
-
-3) 保密协议
-
-Linux内核安全团队不是一个正式的团体，因此不能加入任何的保密协议。
diff --git a/Documentation/usb/mtouchusb.rst b/Documentation/usb/mtouchusb.rst

index d1111b7..5ae1f74 100644 (file)
--- a/Documentation/usb/mtouchusb.rst
+++ b/Documentation/usb/mtouchusb.rst
@@ -1,4 +1,4 @@
-================
+================
  mtouchusb driver
  ================
  
diff --git a/Documentation/usb/usb-serial.rst b/Documentation/usb/usb-serial.rst

index 8fa7dbd..69586ae 100644 (file)
--- a/Documentation/usb/usb-serial.rst
+++ b/Documentation/usb/usb-serial.rst
@@ -1,4 +1,4 @@
-==========
+==========
  USB serial
  ==========
  
diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst

index 5ec8a19..5c081c8 100644 (file)
--- a/Documentation/virt/kvm/amd-memory-encryption.rst
+++ b/Documentation/virt/kvm/amd-memory-encryption.rst
@@ -22,7 +22,7 @@ to SEV::
                   [ecx]:
                         Bits[31:0]  Number of encrypted guests supported simultaneously
  
-If support for SEV is present, MSR 0xc001_0010 (MSR_K8_SYSCFG) and MSR 0xc001_0015
+If support for SEV is present, MSR 0xc001_0010 (MSR_AMD64_SYSCFG) and MSR 0xc001_0015
  (MSR_K7_HWCR) can be used to determine if it can be enabled::
  
         0xc001_0010:
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst

index 22d0775..7fcb2fd 100644 (file)
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY
  4.126 KVM_X86_SET_MSR_FILTER
  ----------------------------
  
-:Capability: KVM_X86_SET_MSR_FILTER
+:Capability: KVM_CAP_X86_MSR_FILTER
  :Architectures: x86
  :Type: vm ioctl
  :Parameters: struct kvm_msr_filter
@@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
  instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
  KVM_EXIT_X86_WRMSR exit notifications.
  
-8.27 KVM_X86_SET_MSR_FILTER
+8.27 KVM_CAP_X86_MSR_FILTER
  ---------------------------
  
  :Architectures: x86
diff --git a/Documentation/x86/amd-memory-encryption.rst b/Documentation/x86/amd-memory-encryption.rst

index c48d452..a1940eb 100644 (file)
--- a/Documentation/x86/amd-memory-encryption.rst
+++ b/Documentation/x86/amd-memory-encryption.rst
@@ -53,7 +53,7 @@ CPUID function 0x8000001f reports information related to SME::
                            system physical addresses, not guest physical
                            addresses)
  
-If support for SME is present, MSR 0xc00100010 (MSR_K8_SYSCFG) can be used to
+If support for SME is present, MSR 0xc00100010 (MSR_AMD64_SYSCFG) can be used to
  determine if SME is enabled and/or to enable memory encryption::
  
         0xc0010010:
@@ -79,7 +79,7 @@ The state of SME in the Linux kernel can be documented as follows:
           The CPU supports SME (determined through CPUID instruction).
  
         - Enabled:
-         Supported and bit 23 of MSR_K8_SYSCFG is set.
+         Supported and bit 23 of MSR_AMD64_SYSCFG is set.
  
         - Active:
           Supported, Enabled and the Linux kernel is actively applying
@@ -89,7 +89,7 @@ The state of SME in the Linux kernel can be documented as follows:
  SME can also be enabled and activated in the BIOS. If SME is enabled and
  activated in the BIOS, then all memory accesses will be encrypted and it will
  not be necessary to activate the Linux memory encryption support.  If the BIOS
-merely enables SME (sets bit 23 of the MSR_K8_SYSCFG), then Linux can activate
+merely enables SME (sets bit 23 of the MSR_AMD64_SYSCFG), then Linux can activate
  memory encryption by default (CONFIG_AMD_MEM_ENCRYPT_ACTIVE_BY_DEFAULT=y) or
  by supplying mem_encrypt=on on the kernel command line.  However, if BIOS does
  not enable SME, then Linux will not be able to activate memory encryption, even
diff --git a/MAINTAINERS b/MAINTAINERS

index bd7aff0..008fcad 100644 (file)
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1578,7 +1578,7 @@ F:        drivers/clk/sunxi/
  ARM/Allwinner sunXi SoC support
  M:     Maxime Ripard <mripard@kernel.org>
  M:     Chen-Yu Tsai <wens@csie.org>
-R:     Jernej Skrabec <jernej.skrabec@siol.net>
+R:     Jernej Skrabec <jernej.skrabec@gmail.com>
  L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:     Maintained
  T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sunxi/linux.git
@@ -5089,7 +5089,7 @@ S:        Maintained
  F:     drivers/net/fddi/defza.*
  
  DEINTERLACE DRIVERS FOR ALLWINNER H3
-M:     Jernej Skrabec <jernej.skrabec@siol.net>
+M:     Jernej Skrabec <jernej.skrabec@gmail.com>
  L:     linux-media@vger.kernel.org
  S:     Maintained
  T:     git git://linuxtv.org/media_tree.git
@@ -5237,7 +5237,7 @@ DEVICE DIRECT ACCESS (DAX)
  M:     Dan Williams <dan.j.williams@intel.com>
  M:     Vishal Verma <vishal.l.verma@intel.com>
  M:     Dave Jiang <dave.jiang@intel.com>
-L:     linux-nvdimm@lists.01.org
+L:     nvdimm@lists.linux.dev
  S:     Supported
  F:     drivers/dax/
  
@@ -5632,14 +5632,14 @@ F:      include/linux/power/smartreflex.h
  DRM DRIVER FOR ALLWINNER DE2 AND DE3 ENGINE
  M:     Maxime Ripard <mripard@kernel.org>
  M:     Chen-Yu Tsai <wens@csie.org>
-R:     Jernej Skrabec <jernej.skrabec@siol.net>
+R:     Jernej Skrabec <jernej.skrabec@gmail.com>
  L:     dri-devel@lists.freedesktop.org
  S:     Supported
  T:     git git://anongit.freedesktop.org/drm/drm-misc
  F:     drivers/gpu/drm/sun4i/sun8i*
  
  DRM DRIVER FOR ARM PL111 CLCD
-M:     Eric Anholt <eric@anholt.net>
+M:     Emma Anholt <emma@anholt.net>
  S:     Supported
  T:     git git://anongit.freedesktop.org/drm/drm-misc
  F:     drivers/gpu/drm/pl111/
@@ -5719,7 +5719,7 @@ T:        git git://anongit.freedesktop.org/drm/drm-misc
  F:     drivers/gpu/drm/tiny/gm12u320.c
  
  DRM DRIVER FOR HX8357D PANELS
-M:     Eric Anholt <eric@anholt.net>
+M:     Emma Anholt <emma@anholt.net>
  S:     Maintained
  T:     git git://anongit.freedesktop.org/drm/drm-misc
  F:     Documentation/devicetree/bindings/display/himax,hx8357d.txt
@@ -6023,7 +6023,7 @@ M:        Neil Armstrong <narmstrong@baylibre.com>
  M:     Robert Foss <robert.foss@linaro.org>
  R:     Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
  R:     Jonas Karlman <jonas@kwiboo.se>
-R:     Jernej Skrabec <jernej.skrabec@siol.net>
+R:     Jernej Skrabec <jernej.skrabec@gmail.com>
  S:     Maintained
  T:     git git://anongit.freedesktop.org/drm/drm-misc
  F:     drivers/gpu/drm/bridge/
@@ -6177,7 +6177,7 @@ F:        Documentation/devicetree/bindings/display/ti/
  F:     drivers/gpu/drm/omapdrm/
  
  DRM DRIVERS FOR V3D
-M:     Eric Anholt <eric@anholt.net>
+M:     Emma Anholt <emma@anholt.net>
  S:     Supported
  T:     git git://anongit.freedesktop.org/drm/drm-misc
  F:     Documentation/devicetree/bindings/gpu/brcm,bcm-v3d.yaml
@@ -6185,7 +6185,7 @@ F:        drivers/gpu/drm/v3d/
  F:     include/uapi/drm/v3d_drm.h
  
  DRM DRIVERS FOR VC4
-M:     Eric Anholt <eric@anholt.net>
+M:     Emma Anholt <emma@anholt.net>
  M:     Maxime Ripard <mripard@kernel.org>
  S:     Supported
  T:     git git://github.com/anholt/linux
@@ -7006,7 +7006,7 @@ M:        Dan Williams <dan.j.williams@intel.com>
  R:     Matthew Wilcox <willy@infradead.org>
  R:     Jan Kara <jack@suse.cz>
  L:     linux-fsdevel@vger.kernel.org
-L:     linux-nvdimm@lists.01.org
+L:     nvdimm@lists.linux.dev
  S:     Supported
  F:     fs/dax.c
  F:     include/linux/dax.h
@@ -10378,7 +10378,7 @@ LIBNVDIMM BLK: MMIO-APERTURE DRIVER
  M:     Dan Williams <dan.j.williams@intel.com>
  M:     Vishal Verma <vishal.l.verma@intel.com>
  M:     Dave Jiang <dave.jiang@intel.com>
-L:     linux-nvdimm@lists.01.org
+L:     nvdimm@lists.linux.dev
  S:     Supported
  Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
  P:     Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10389,7 +10389,7 @@ LIBNVDIMM BTT: BLOCK TRANSLATION TABLE
  M:     Vishal Verma <vishal.l.verma@intel.com>
  M:     Dan Williams <dan.j.williams@intel.com>
  M:     Dave Jiang <dave.jiang@intel.com>
-L:     linux-nvdimm@lists.01.org
+L:     nvdimm@lists.linux.dev
  S:     Supported
  Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
  P:     Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10399,7 +10399,7 @@ LIBNVDIMM PMEM: PERSISTENT MEMORY DRIVER
  M:     Dan Williams <dan.j.williams@intel.com>
  M:     Vishal Verma <vishal.l.verma@intel.com>
  M:     Dave Jiang <dave.jiang@intel.com>
-L:     linux-nvdimm@lists.01.org
+L:     nvdimm@lists.linux.dev
  S:     Supported
  Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
  P:     Documentation/nvdimm/maintainer-entry-profile.rst
@@ -10407,7 +10407,7 @@ F:      drivers/nvdimm/pmem*
  
  LIBNVDIMM: DEVICETREE BINDINGS
  M:     Oliver O'Halloran <oohall@gmail.com>
-L:     linux-nvdimm@lists.01.org
+L:     nvdimm@lists.linux.dev
  S:     Supported
  Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
  F:     Documentation/devicetree/bindings/pmem/pmem-region.txt
@@ -10418,7 +10418,7 @@ M:      Dan Williams <dan.j.williams@intel.com>
  M:     Vishal Verma <vishal.l.verma@intel.com>
  M:     Dave Jiang <dave.jiang@intel.com>
  M:     Ira Weiny <ira.weiny@intel.com>
-L:     linux-nvdimm@lists.01.org
+L:     nvdimm@lists.linux.dev
  S:     Supported
  Q:     https://patchwork.kernel.org/project/linux-nvdimm/list/
  P:     Documentation/nvdimm/maintainer-entry-profile.rst
@@ -15815,7 +15815,7 @@ F:      include/uapi/linux/rose.h
  F:     net/rose/
  
  ROTATION DRIVER FOR ALLWINNER A83T
-M:     Jernej Skrabec <jernej.skrabec@siol.net>
+M:     Jernej Skrabec <jernej.skrabec@gmail.com>
  L:     linux-media@vger.kernel.org
  S:     Maintained
  T:     git git://linuxtv.org/media_tree.git
diff --git a/arch/arc/Makefile b/arch/arc/Makefile

index 4392c9c..e47adc9 100644 (file)
--- a/arch/arc/Makefile
+++ b/arch/arc/Makefile
@@ -31,7 +31,7 @@ endif
  
  
  ifdef CONFIG_ARC_CURR_IN_REG
-# For a global register defintion, make sure it gets passed to every file
+# For a global register definition, make sure it gets passed to every file
  # We had a customer reported bug where some code built in kernel was NOT using
  # any kernel headers, and missing the r25 global register
  # Can't do unconditionally because of recursive include issues
diff --git a/arch/arc/include/asm/cmpxchg.h b/arch/arc/include/asm/cmpxchg.h

index 9b87e16..dfeffa2 100644 (file)
--- a/arch/arc/include/asm/cmpxchg.h
+++ b/arch/arc/include/asm/cmpxchg.h
@@ -116,7 +116,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
   *
   * Technically the lock is also needed for UP (boils down to irq save/restore)
   * but we can cheat a bit since cmpxchg() atomic_ops_lock() would cause irqs to
- * be disabled thus can't possibly be interrpted/preempted/clobbered by xchg()
+ * be disabled thus can't possibly be interrupted/preempted/clobbered by xchg()
   * Other way around, xchg is one instruction anyways, so can't be interrupted
   * as such
   */
@@ -143,7 +143,7 @@ static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
  /*
   * "atomic" variant of xchg()
   * REQ: It needs to follow the same serialization rules as other atomic_xxx()
- * Since xchg() doesn't always do that, it would seem that following defintion
+ * Since xchg() doesn't always do that, it would seem that following definition
   * is incorrect. But here's the rationale:
   *   SMP : Even xchg() takes the atomic_ops_lock, so OK.
   *   LLSC: atomic_ops_lock are not relevant at all (even if SMP, since LLSC
diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h

index ad9b7fe..4a9d333 100644 (file)
--- a/arch/arc/include/asm/page.h
+++ b/arch/arc/include/asm/page.h
@@ -7,6 +7,18 @@
  
  #include <uapi/asm/page.h>
  
+#ifdef CONFIG_ARC_HAS_PAE40
+
+#define MAX_POSSIBLE_PHYSMEM_BITS      40
+#define PAGE_MASK_PHYS                 (0xff00000000ull | PAGE_MASK)
+
+#else /* CONFIG_ARC_HAS_PAE40 */
+
+#define MAX_POSSIBLE_PHYSMEM_BITS      32
+#define PAGE_MASK_PHYS                 PAGE_MASK
+
+#endif /* CONFIG_ARC_HAS_PAE40 */
+
  #ifndef __ASSEMBLY__
  
  #define clear_page(paddr)              memset((paddr), 0, PAGE_SIZE)
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h

index 1636417..5878846 100644 (file)
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -107,8 +107,8 @@
  #define ___DEF (_PAGE_PRESENT | _PAGE_CACHEABLE)
  
  /* Set of bits not changed in pte_modify */
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL)
-
+#define _PAGE_CHG_MASK (PAGE_MASK_PHYS | _PAGE_ACCESSED | _PAGE_DIRTY | \
+                                                          _PAGE_SPECIAL)
  /* More Abbrevaited helpers */
  #define PAGE_U_NONE     __pgprot(___DEF)
  #define PAGE_U_R        __pgprot(___DEF | _PAGE_READ)
@@ -132,13 +132,7 @@
  #define PTE_BITS_IN_PD0                (_PAGE_GLOBAL | _PAGE_PRESENT | _PAGE_HW_SZ)
  #define PTE_BITS_RWX           (_PAGE_EXECUTE | _PAGE_WRITE | _PAGE_READ)
  
-#ifdef CONFIG_ARC_HAS_PAE40
-#define PTE_BITS_NON_RWX_IN_PD1        (0xff00000000 | PAGE_MASK | _PAGE_CACHEABLE)
-#define MAX_POSSIBLE_PHYSMEM_BITS 40
-#else
-#define PTE_BITS_NON_RWX_IN_PD1        (PAGE_MASK | _PAGE_CACHEABLE)
-#define MAX_POSSIBLE_PHYSMEM_BITS 32
-#endif
+#define PTE_BITS_NON_RWX_IN_PD1        (PAGE_MASK_PHYS | _PAGE_CACHEABLE)
  
  /**************************************************************************
   * Mapping of vm_flags (Generic VM) to PTE flags (arch specific)
diff --git a/arch/arc/include/uapi/asm/page.h b/arch/arc/include/uapi/asm/page.h

index 2a97e27..2a4ad61 100644 (file)
--- a/arch/arc/include/uapi/asm/page.h
+++ b/arch/arc/include/uapi/asm/page.h
@@ -33,5 +33,4 @@
  
  #define PAGE_MASK      (~(PAGE_SIZE-1))
  
-
  #endif /* _UAPI__ASM_ARC_PAGE_H */
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S

index 1743506..2cb8dfe 100644 (file)
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -177,7 +177,7 @@ tracesys:
  
         ; Do the Sys Call as we normally would.
         ; Validate the Sys Call number
-       cmp     r8,  NR_syscalls
+       cmp     r8,  NR_syscalls - 1
         mov.hi  r0, -ENOSYS
         bhi     tracesys_exit
  
@@ -255,7 +255,7 @@ ENTRY(EV_Trap)
         ;============ Normal syscall case
  
         ; syscall num shd not exceed the total system calls avail
-       cmp     r8,  NR_syscalls
+       cmp     r8,  NR_syscalls - 1
         mov.hi  r0, -ENOSYS
         bhi     .Lret_from_system_call
  
diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c

index ecfbc42..345a000 100644 (file)
--- a/arch/arc/kernel/kgdb.c
+++ b/arch/arc/kernel/kgdb.c
@@ -140,6 +140,7 @@ int kgdb_arch_handle_exception(int e_vector, int signo, int err_code,
                 ptr = &remcomInBuffer[1];
                 if (kgdb_hex2long(&ptr, &addr))
                         regs->ret = addr;
+               fallthrough;
  
         case 'D':
         case 'k':
diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c

index d838d0d..3793876 100644 (file)
--- a/arch/arc/kernel/process.c
+++ b/arch/arc/kernel/process.c
@@ -50,14 +50,14 @@ SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new)
         int ret;
  
         /*
-        * This is only for old cores lacking LLOCK/SCOND, which by defintion
+        * This is only for old cores lacking LLOCK/SCOND, which by definition
          * can't possibly be SMP. Thus doesn't need to be SMP safe.
          * And this also helps reduce the overhead for serializing in
          * the UP case
          */
         WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP));
  
-       /* Z indicates to userspace if operation succeded */
+       /* Z indicates to userspace if operation succeeded */
         regs->status32 &= ~STATUS_Z_MASK;
  
         ret = access_ok(uaddr, sizeof(*uaddr));
@@ -107,7 +107,7 @@ fail:
  
  void arch_cpu_idle(void)
  {
-       /* Re-enable interrupts <= default irq priority before commiting SLEEP */
+       /* Re-enable interrupts <= default irq priority before committing SLEEP */
         const unsigned int arg = 0x10 | ARCV2_IRQ_DEF_PRIO;
  
         __asm__ __volatile__(
@@ -120,7 +120,7 @@ void arch_cpu_idle(void)
  
  void arch_cpu_idle(void)
  {
-       /* sleep, but enable both set E1/E2 (levels of interrutps) before committing */
+       /* sleep, but enable both set E1/E2 (levels of interrupts) before committing */
         __asm__ __volatile__("sleep 0x3 \n");
  }
  
diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c

index fdbe06c..b3ccb9e 100644 (file)
--- a/arch/arc/kernel/signal.c
+++ b/arch/arc/kernel/signal.c
@@ -259,7 +259,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
                 regs->r2 = (unsigned long)&sf->uc;
  
                 /*
-                * small optim to avoid unconditonally calling do_sigaltstack
+                * small optim to avoid unconditionally calling do_sigaltstack
                  * in sigreturn path, now that we only have rt_sigreturn
                  */
                 magic = MAGIC_SIGALTSTK;
@@ -391,7 +391,7 @@ void do_signal(struct pt_regs *regs)
  void do_notify_resume(struct pt_regs *regs)
  {
         /*
-        * ASM glue gaurantees that this is only called when returning to
+        * ASM glue guarantees that this is only called when returning to
          * user mode
          */
         if (test_thread_flag(TIF_NOTIFY_RESUME))
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c

index 33832e3..e2ed355 100644 (file)
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -157,7 +157,16 @@ void __init setup_arch_memory(void)
         min_high_pfn = PFN_DOWN(high_mem_start);
         max_high_pfn = PFN_DOWN(high_mem_start + high_mem_sz);
  
-       max_zone_pfn[ZONE_HIGHMEM] = min_low_pfn;
+       /*
+        * max_high_pfn should be ok here for both HIGHMEM and HIGHMEM+PAE.
+        * For HIGHMEM without PAE max_high_pfn should be less than
+        * min_low_pfn to guarantee that these two regions don't overlap.
+        * For PAE case highmem is greater than lowmem, so it is natural
+        * to use max_high_pfn.
+        *
+        * In both cases, holes should be handled by pfn_valid().
+        */
+       max_zone_pfn[ZONE_HIGHMEM] = max_high_pfn;
  
         high_memory = (void *)(min_high_pfn << PAGE_SHIFT);
  
diff --git a/arch/arc/mm/ioremap.c b/arch/arc/mm/ioremap.c

index fac4adc..95c649f 100644 (file)
--- a/arch/arc/mm/ioremap.c
+++ b/arch/arc/mm/ioremap.c
@@ -53,9 +53,10 @@ EXPORT_SYMBOL(ioremap);
  void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
                            unsigned long flags)
  {
+       unsigned int off;
         unsigned long vaddr;
         struct vm_struct *area;
-       phys_addr_t off, end;
+       phys_addr_t end;
         pgprot_t prot = __pgprot(flags);
  
         /* Don't allow wraparound, zero size */
@@ -72,7 +73,7 @@ void __iomem *ioremap_prot(phys_addr_t paddr, unsigned long size,
  
         /* Mappings have to be page-aligned */
         off = paddr & ~PAGE_MASK;
-       paddr &= PAGE_MASK;
+       paddr &= PAGE_MASK_PHYS;
         size = PAGE_ALIGN(end + 1) - paddr;
  
         /*
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c

index 9bb3c24..9c7c682 100644 (file)
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -576,7 +576,7 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long vaddr_unaligned,
                       pte_t *ptep)
  {
         unsigned long vaddr = vaddr_unaligned & PAGE_MASK;
-       phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK;
+       phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS;
         struct page *page = pfn_to_page(pte_pfn(*ptep));
  
         create_tlb(vma, vaddr, ptep);
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c

index f8f0746..a7e54a0 100644 (file)
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -135,24 +135,18 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order)
         return;
  }
  
-int xen_swiotlb_detect(void)
-{
-       if (!xen_domain())
-               return 0;
-       if (xen_feature(XENFEAT_direct_mapped))
-               return 1;
-       /* legacy case */
-       if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
-               return 1;
-       return 0;
-}
-
  static int __init xen_mm_init(void)
  {
         struct gnttab_cache_flush cflush;
+       int rc;
+
         if (!xen_swiotlb_detect())
                 return 0;
-       xen_swiotlb_init();
+
+       rc = xen_swiotlb_init();
+       /* we can work with the default swiotlb */
+       if (rc < 0 && rc != -EEXIST)
+               return rc;
  
         cflush.op = 0;
         cflush.a.dev_bus_addr = 0;
diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile

index 7ef4447..b52481f 100644 (file)
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -175,6 +175,9 @@ vdso_install:
         $(if $(CONFIG_COMPAT_VDSO), \
                 $(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso32 $@)
  
+archprepare:
+       $(Q)$(MAKE) $(build)=arch/arm64/tools kapi
+
  # We use MRPROPER_FILES and CLEAN_FILES now
  archclean:
         $(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild

index 07ac208..26889db 100644 (file)
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -5,3 +5,5 @@ generic-y += qrwlock.h
  generic-y += qspinlock.h
  generic-y += set_memory.h
  generic-y += user.h
+
+generated-y += cpucaps.h
diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h

deleted file mode 100644 (file)

index b0c5eda..0000000
--- a/arch/arm64/include/asm/cpucaps.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm64/include/asm/cpucaps.h
- *
- * Copyright (C) 2016 ARM Ltd.
- */
-#ifndef __ASM_CPUCAPS_H
-#define __ASM_CPUCAPS_H
-
-#define ARM64_WORKAROUND_CLEAN_CACHE           0
-#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE   1
-#define ARM64_WORKAROUND_845719                        2
-#define ARM64_HAS_SYSREG_GIC_CPUIF             3
-#define ARM64_HAS_PAN                          4
-#define ARM64_HAS_LSE_ATOMICS                  5
-#define ARM64_WORKAROUND_CAVIUM_23154          6
-#define ARM64_WORKAROUND_834220                        7
-#define ARM64_HAS_NO_HW_PREFETCH               8
-#define ARM64_HAS_VIRT_HOST_EXTN               11
-#define ARM64_WORKAROUND_CAVIUM_27456          12
-#define ARM64_HAS_32BIT_EL0                    13
-#define ARM64_SPECTRE_V3A                      14
-#define ARM64_HAS_CNP                          15
-#define ARM64_HAS_NO_FPSIMD                    16
-#define ARM64_WORKAROUND_REPEAT_TLBI           17
-#define ARM64_WORKAROUND_QCOM_FALKOR_E1003     18
-#define ARM64_WORKAROUND_858921                        19
-#define ARM64_WORKAROUND_CAVIUM_30115          20
-#define ARM64_HAS_DCPOP                                21
-#define ARM64_SVE                              22
-#define ARM64_UNMAP_KERNEL_AT_EL0              23
-#define ARM64_SPECTRE_V2                       24
-#define ARM64_HAS_RAS_EXTN                     25
-#define ARM64_WORKAROUND_843419                        26
-#define ARM64_HAS_CACHE_IDC                    27
-#define ARM64_HAS_CACHE_DIC                    28
-#define ARM64_HW_DBM                           29
-#define ARM64_SPECTRE_V4                       30
-#define ARM64_MISMATCHED_CACHE_TYPE            31
-#define ARM64_HAS_STAGE2_FWB                   32
-#define ARM64_HAS_CRC32                                33
-#define ARM64_SSBS                             34
-#define ARM64_WORKAROUND_1418040               35
-#define ARM64_HAS_SB                           36
-#define ARM64_WORKAROUND_SPECULATIVE_AT                37
-#define ARM64_HAS_ADDRESS_AUTH_ARCH            38
-#define ARM64_HAS_ADDRESS_AUTH_IMP_DEF         39
-#define ARM64_HAS_GENERIC_AUTH_ARCH            40
-#define ARM64_HAS_GENERIC_AUTH_IMP_DEF         41
-#define ARM64_HAS_IRQ_PRIO_MASKING             42
-#define ARM64_HAS_DCPODP                       43
-#define ARM64_WORKAROUND_1463225               44
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM    45
-#define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM   46
-#define ARM64_WORKAROUND_1542419               47
-#define ARM64_HAS_E0PD                         48
-#define ARM64_HAS_RNG                          49
-#define ARM64_HAS_AMU_EXTN                     50
-#define ARM64_HAS_ADDRESS_AUTH                 51
-#define ARM64_HAS_GENERIC_AUTH                 52
-#define ARM64_HAS_32BIT_EL1                    53
-#define ARM64_BTI                              54
-#define ARM64_HAS_ARMv8_4_TTL                  55
-#define ARM64_HAS_TLB_RANGE                    56
-#define ARM64_MTE                              57
-#define ARM64_WORKAROUND_1508412               58
-#define ARM64_HAS_LDAPR                                59
-#define ARM64_KVM_PROTECTED_MODE               60
-#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP     61
-#define ARM64_HAS_EPAN                         62
-
-#define ARM64_NCAPS                            63
-
-#endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c

index ac48516..6d44c02 100644 (file)
--- a/arch/arm64/mm/flush.c
+++ b/arch/arm64/mm/flush.c
@@ -55,8 +55,10 @@ void __sync_icache_dcache(pte_t pte)
  {
         struct page *page = pte_page(pte);
  
-       if (!test_and_set_bit(PG_dcache_clean, &page->flags))
+       if (!test_bit(PG_dcache_clean, &page->flags)) {
                 sync_icache_aliases(page_address(page), page_size(page));
+               set_bit(PG_dcache_clean, &page->flags);
+       }
  }
  EXPORT_SYMBOL_GPL(__sync_icache_dcache);
  
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c

index 16a2b2b..e55409c 100644 (file)
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -43,6 +43,7 @@
  #include <linux/sizes.h>
  #include <asm/tlb.h>
  #include <asm/alternative.h>
+#include <asm/xen/swiotlb-xen.h>
  
  /*
   * We need to be able to catch inadvertent references to memstart_addr
@@ -482,7 +483,7 @@ void __init mem_init(void)
         if (swiotlb_force == SWIOTLB_FORCE ||
             max_pfn > PFN_DOWN(arm64_dma_phys_limit))
                 swiotlb_init(1);
-       else
+       else if (!xen_swiotlb_detect())
                 swiotlb_force = SWIOTLB_NO_FORCE;
  
         set_max_mapnr(max_pfn - PHYS_PFN_OFFSET);
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S

index 0a48191..97d7bcd 100644 (file)
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -447,6 +447,18 @@ SYM_FUNC_START(__cpu_setup)
         mov     x10, #(SYS_GCR_EL1_RRND | SYS_GCR_EL1_EXCL_MASK)
         msr_s   SYS_GCR_EL1, x10
  
+       /*
+        * If GCR_EL1.RRND=1 is implemented the same way as RRND=0, then
+        * RGSR_EL1.SEED must be non-zero for IRG to produce
+        * pseudorandom numbers. As RGSR_EL1 is UNKNOWN out of reset, we
+        * must initialize it.
+        */
+       mrs     x10, CNTVCT_EL0
+       ands    x10, x10, #SYS_RGSR_EL1_SEED_MASK
+       csinc   x10, x10, xzr, ne
+       lsl     x10, x10, #SYS_RGSR_EL1_SEED_SHIFT
+       msr_s   SYS_RGSR_EL1, x10
+
         /* clear any pending tag check faults in TFSR*_EL1 */
         msr_s   SYS_TFSR_EL1, xzr
         msr_s   SYS_TFSRE0_EL1, xzr
diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile

new file mode 100644 (file)

index 0000000..932b4fe
--- /dev/null
+++ b/arch/arm64/tools/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+
+gen := arch/$(ARCH)/include/generated
+kapi := $(gen)/asm
+
+kapi-hdrs-y := $(kapi)/cpucaps.h
+
+targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y))
+
+PHONY += kapi
+
+kapi:   $(kapi-hdrs-y) $(gen-y)
+
+# Create output directory if not already present
+_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
+
+quiet_cmd_gen_cpucaps = GEN     $@
+      cmd_gen_cpucaps = mkdir -p $(dir $@) && \
+                     $(AWK) -f $(filter-out $(PHONY),$^) > $@
+
+$(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE
+       $(call if_changed,gen_cpucaps)
diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps

new file mode 100644 (file)

index 0000000..21fbdda
--- /dev/null
+++ b/arch/arm64/tools/cpucaps
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Internal CPU capabilities constants, keep this list sorted
+
+BTI
+HAS_32BIT_EL0
+HAS_32BIT_EL1
+HAS_ADDRESS_AUTH
+HAS_ADDRESS_AUTH_ARCH
+HAS_ADDRESS_AUTH_IMP_DEF
+HAS_AMU_EXTN
+HAS_ARMv8_4_TTL
+HAS_CACHE_DIC
+HAS_CACHE_IDC
+HAS_CNP
+HAS_CRC32
+HAS_DCPODP
+HAS_DCPOP
+HAS_E0PD
+HAS_EPAN
+HAS_GENERIC_AUTH
+HAS_GENERIC_AUTH_ARCH
+HAS_GENERIC_AUTH_IMP_DEF
+HAS_IRQ_PRIO_MASKING
+HAS_LDAPR
+HAS_LSE_ATOMICS
+HAS_NO_FPSIMD
+HAS_NO_HW_PREFETCH
+HAS_PAN
+HAS_RAS_EXTN
+HAS_RNG
+HAS_SB
+HAS_STAGE2_FWB
+HAS_SYSREG_GIC_CPUIF
+HAS_TLB_RANGE
+HAS_VIRT_HOST_EXTN
+HW_DBM
+KVM_PROTECTED_MODE
+MISMATCHED_CACHE_TYPE
+MTE
+SPECTRE_V2
+SPECTRE_V3A
+SPECTRE_V4
+SSBS
+SVE
+UNMAP_KERNEL_AT_EL0
+WORKAROUND_834220
+WORKAROUND_843419
+WORKAROUND_845719
+WORKAROUND_858921
+WORKAROUND_1418040
+WORKAROUND_1463225
+WORKAROUND_1508412
+WORKAROUND_1542419
+WORKAROUND_CAVIUM_23154
+WORKAROUND_CAVIUM_27456
+WORKAROUND_CAVIUM_30115
+WORKAROUND_CAVIUM_TX2_219_PRFM
+WORKAROUND_CAVIUM_TX2_219_TVM
+WORKAROUND_CLEAN_CACHE
+WORKAROUND_DEVICE_LOAD_ACQUIRE
+WORKAROUND_NVIDIA_CARMEL_CNP
+WORKAROUND_QCOM_FALKOR_E1003
+WORKAROUND_REPEAT_TLBI
+WORKAROUND_SPECULATIVE_AT
diff --git a/arch/arm64/tools/gen-cpucaps.awk b/arch/arm64/tools/gen-cpucaps.awk

new file mode 100755 (executable)

index 0000000..00c9e72
--- /dev/null
+++ b/arch/arm64/tools/gen-cpucaps.awk
@@ -0,0 +1,40 @@
+#!/bin/awk -f
+# SPDX-License-Identifier: GPL-2.0
+# gen-cpucaps.awk: arm64 cpucaps header generator
+#
+# Usage: awk -f gen-cpucaps.awk cpucaps.txt
+
+# Log an error and terminate
+function fatal(msg) {
+       print "Error at line " NR ": " msg > "/dev/stderr"
+       exit 1
+}
+
+# skip blank lines and comment lines
+/^$/ { next }
+/^#/ { next }
+
+BEGIN {
+       print "#ifndef __ASM_CPUCAPS_H"
+       print "#define __ASM_CPUCAPS_H"
+       print ""
+       print "/* Generated file - do not edit */"
+       cap_num = 0
+       print ""
+}
+
+/^[vA-Z0-9_]+$/ {
+       printf("#define ARM64_%-30s\t%d\n", $0, cap_num++)
+       next
+}
+
+END {
+       printf("#define ARM64_NCAPS\t\t\t\t%d\n", cap_num)
+       print ""
+       print "#endif /* __ASM_CPUCAPS_H */"
+}
+
+# Any lines not handled by previous rules are unexpected
+{
+       fatal("unhandled statement")
+}
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h

index 4430509..e3b29ed 100644 (file)
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -448,6 +448,9 @@
   */
  long plpar_hcall_norets(unsigned long opcode, ...);
  
+/* Variant which does not do hcall tracing */
+long plpar_hcall_norets_notrace(unsigned long opcode, ...);
+
  /**
   * plpar_hcall: - Make a pseries hypervisor call
   * @opcode: The hypervisor call to make.
diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h

index 44cde2e..59f7044 100644 (file)
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -153,8 +153,6 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup
   */
  static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state)
  {
-       if (user_mode(regs))
-               kuep_unlock();
  }
  
  static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state)
@@ -222,6 +220,13 @@ static inline void interrupt_nmi_enter_prepare(struct pt_regs *regs, struct inte
         local_paca->irq_soft_mask = IRQS_ALL_DISABLED;
         local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
  
+       if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !(regs->msr & MSR_PR) &&
+                               regs->nip < (unsigned long)__end_interrupts) {
+               // Kernel code running below __end_interrupts is
+               // implicitly soft-masked.
+               regs->softe = IRQS_ALL_DISABLED;
+       }
+
         /* Don't do any per-CPU operations until interrupt state is fixed */
  
         if (nmi_disables_ftrace(regs)) {
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h

index 5d1726b..bcb7b5f 100644 (file)
--- a/arch/powerpc/include/asm/paravirt.h
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -28,19 +28,35 @@ static inline u32 yield_count_of(int cpu)
         return be32_to_cpu(yield_count);
  }
  
+/*
+ * Spinlock code confers and prods, so don't trace the hcalls because the
+ * tracing code takes spinlocks which can cause recursion deadlocks.
+ *
+ * These calls are made while the lock is not held: the lock slowpath yields if
+ * it can not acquire the lock, and unlock slow path might prod if a waiter has
+ * yielded). So this may not be a problem for simple spin locks because the
+ * tracing does not technically recurse on the lock, but we avoid it anyway.
+ *
+ * However the queued spin lock contended path is more strictly ordered: the
+ * H_CONFER hcall is made after the task has queued itself on the lock, so then
+ * recursing on that lock will cause the task to then queue up again behind the
+ * first instance (or worse: queued spinlocks use tricks that assume a context
+ * never waits on more than one spinlock, so such recursion may cause random
+ * corruption in the lock code).
+ */
  static inline void yield_to_preempted(int cpu, u32 yield_count)
  {
-       plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+       plpar_hcall_norets_notrace(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
  }
  
  static inline void prod_cpu(int cpu)
  {
-       plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+       plpar_hcall_norets_notrace(H_PROD, get_hard_smp_processor_id(cpu));
  }
  
  static inline void yield_to_any(void)
  {
-       plpar_hcall_norets(H_CONFER, -1, 0);
+       plpar_hcall_norets_notrace(H_CONFER, -1, 0);
  }
  #else
  static inline bool is_shared_processor(void)
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h

index ece84a4..83e0f70 100644 (file)
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -28,7 +28,11 @@ static inline void set_cede_latency_hint(u8 latency_hint)
  
  static inline long cede_processor(void)
  {
-       return plpar_hcall_norets(H_CEDE);
+       /*
+        * We cannot call tracepoints inside RCU idle regions which
+        * means we must not trace H_CEDE.
+        */
+       return plpar_hcall_norets_notrace(H_CEDE);
  }
  
  static inline long extended_cede_processor(unsigned long latency_hint)
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h

index a09e424..22c79ab 100644 (file)
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -157,7 +157,7 @@ do {                                                                \
                 "2:     lwz%X1 %L0, %L1\n"                      \
                 EX_TABLE(1b, %l2)                               \
                 EX_TABLE(2b, %l2)                               \
-               : "=r" (x)                                      \
+               : "=&r" (x)                                     \
                 : "m" (*addr)                                   \
                 :                                               \
                 : label)
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S

index 7c3654b..f1ae710 100644 (file)
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -340,6 +340,12 @@ ret_from_mc_except:
         andi.   r10,r10,IRQS_DISABLED;  /* yes -> go out of line */ \
         bne     masked_interrupt_book3e_##n
  
+/*
+ * Additional regs must be re-loaded from paca before EXCEPTION_COMMON* is
+ * called, because that does SAVE_NVGPRS which must see the original register
+ * values, otherwise the scratch values might be restored when exiting the
+ * interrupt.
+ */
  #define PROLOG_ADDITION_2REGS_GEN(n)                                       \
         std     r14,PACA_EXGEN+EX_R14(r13);                                 \
         std     r15,PACA_EXGEN+EX_R15(r13)
@@ -535,6 +541,10 @@ __end_interrupts:
                                 PROLOG_ADDITION_2REGS)
         mfspr   r14,SPRN_DEAR
         mfspr   r15,SPRN_ESR
+       std     r14,_DAR(r1)
+       std     r15,_DSISR(r1)
+       ld      r14,PACA_EXGEN+EX_R14(r13)
+       ld      r15,PACA_EXGEN+EX_R15(r13)
         EXCEPTION_COMMON(0x300)
         b       storage_fault_common
  
@@ -544,6 +554,10 @@ __end_interrupts:
                                 PROLOG_ADDITION_2REGS)
         li      r15,0
         mr      r14,r10
+       std     r14,_DAR(r1)
+       std     r15,_DSISR(r1)
+       ld      r14,PACA_EXGEN+EX_R14(r13)
+       ld      r15,PACA_EXGEN+EX_R15(r13)
         EXCEPTION_COMMON(0x400)
         b       storage_fault_common
  
@@ -557,6 +571,10 @@ __end_interrupts:
                                 PROLOG_ADDITION_2REGS)
         mfspr   r14,SPRN_DEAR
         mfspr   r15,SPRN_ESR
+       std     r14,_DAR(r1)
+       std     r15,_DSISR(r1)
+       ld      r14,PACA_EXGEN+EX_R14(r13)
+       ld      r15,PACA_EXGEN+EX_R15(r13)
         EXCEPTION_COMMON(0x600)
         b       alignment_more  /* no room, go out of line */
  
@@ -565,10 +583,10 @@ __end_interrupts:
         NORMAL_EXCEPTION_PROLOG(0x700, BOOKE_INTERRUPT_PROGRAM,
                                 PROLOG_ADDITION_1REG)
         mfspr   r14,SPRN_ESR
-       EXCEPTION_COMMON(0x700)
         std     r14,_DSISR(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
         ld      r14,PACA_EXGEN+EX_R14(r13)
+       EXCEPTION_COMMON(0x700)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
         bl      program_check_exception
         REST_NVGPRS(r1)
         b       interrupt_return
@@ -725,11 +743,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
          * normal exception
          */
         mfspr   r14,SPRN_DBSR
-       EXCEPTION_COMMON_CRIT(0xd00)
         std     r14,_DSISR(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
         ld      r14,PACA_EXCRIT+EX_R14(r13)
         ld      r15,PACA_EXCRIT+EX_R15(r13)
+       EXCEPTION_COMMON_CRIT(0xd00)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
         bl      DebugException
         REST_NVGPRS(r1)
         b       interrupt_return
@@ -796,11 +814,11 @@ kernel_dbg_exc:
          * normal exception
          */
         mfspr   r14,SPRN_DBSR
-       EXCEPTION_COMMON_DBG(0xd08)
         std     r14,_DSISR(r1)
-       addi    r3,r1,STACK_FRAME_OVERHEAD
         ld      r14,PACA_EXDBG+EX_R14(r13)
         ld      r15,PACA_EXDBG+EX_R15(r13)
+       EXCEPTION_COMMON_DBG(0xd08)
+       addi    r3,r1,STACK_FRAME_OVERHEAD
         bl      DebugException
         REST_NVGPRS(r1)
         b       interrupt_return
@@ -931,11 +949,7 @@ masked_interrupt_book3e_0x2c0:
   * original values stashed away in the PACA
   */
  storage_fault_common:
-       std     r14,_DAR(r1)
-       std     r15,_DSISR(r1)
         addi    r3,r1,STACK_FRAME_OVERHEAD
-       ld      r14,PACA_EXGEN+EX_R14(r13)
-       ld      r15,PACA_EXGEN+EX_R15(r13)
         bl      do_page_fault
         b       interrupt_return
  
@@ -944,11 +958,7 @@ storage_fault_common:
   * continues here.
   */
  alignment_more:
-       std     r14,_DAR(r1)
-       std     r15,_DSISR(r1)
         addi    r3,r1,STACK_FRAME_OVERHEAD
-       ld      r14,PACA_EXGEN+EX_R14(r13)
-       ld      r15,PACA_EXGEN+EX_R15(r13)
         bl      alignment_exception
         REST_NVGPRS(r1)
         b       interrupt_return
diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c

index e4559f8..e0938ba 100644 (file)
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -34,9 +34,6 @@ notrace long system_call_exception(long r3, long r4, long r5,
         syscall_fn f;
  
         kuep_lock();
-#ifdef CONFIG_PPC32
-       kuap_save_and_lock(regs);
-#endif
  
         regs->orig_gpr3 = r3;
  
@@ -427,6 +424,7 @@ again:
  
         /* Restore user access locks last */
         kuap_user_restore(regs);
+       kuep_unlock();
  
         return ret;
  }
diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c

index 8b2c1a8..cfc03e0 100644 (file)
--- a/arch/powerpc/kernel/legacy_serial.c
+++ b/arch/powerpc/kernel/legacy_serial.c
@@ -356,13 +356,16 @@ static void __init setup_legacy_serial_console(int console)
  
  static int __init ioremap_legacy_serial_console(void)
  {
-       struct legacy_serial_info *info = &legacy_serial_infos[legacy_serial_console];
-       struct plat_serial8250_port *port = &legacy_serial_ports[legacy_serial_console];
+       struct plat_serial8250_port *port;
+       struct legacy_serial_info *info;
         void __iomem *vaddr;
  
         if (legacy_serial_console < 0)
                 return 0;
  
+       info = &legacy_serial_infos[legacy_serial_console];
+       port = &legacy_serial_ports[legacy_serial_console];
+
         if (!info->early_addr)
                 return 0;
  
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h

index f4aafa3..1f07317 100644 (file)
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -166,9 +166,9 @@ copy_ckfpr_from_user(struct task_struct *task, void __user *from)
  }
  #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
  #else
-#define unsafe_copy_fpr_to_user(to, task, label) do { } while (0)
+#define unsafe_copy_fpr_to_user(to, task, label) do { if (0) goto label;} while (0)
  
-#define unsafe_copy_fpr_from_user(task, from, label) do { } while (0)
+#define unsafe_copy_fpr_from_user(task, from, label) do { if (0) goto label;} while (0)
  
  static inline unsigned long
  copy_fpr_to_user(void __user *to, struct task_struct *task)
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c

index 2d9193c..c63e263 100644 (file)
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -840,7 +840,7 @@ bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
                         kvm_unmap_radix(kvm, range->slot, gfn);
         } else {
                 for (gfn = range->start; gfn < range->end; gfn++)
-                       kvm_unmap_rmapp(kvm, range->slot, range->start);
+                       kvm_unmap_rmapp(kvm, range->slot, gfn);
         }
  
         return false;
diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c

index 1fd31b4..fe26f2f 100644 (file)
--- a/arch/powerpc/lib/feature-fixups.c
+++ b/arch/powerpc/lib/feature-fixups.c
@@ -14,6 +14,7 @@
  #include <linux/string.h>
  #include <linux/init.h>
  #include <linux/sched/mm.h>
+#include <linux/stop_machine.h>
  #include <asm/cputable.h>
  #include <asm/code-patching.h>
  #include <asm/page.h>
@@ -149,17 +150,17 @@ static void do_stf_entry_barrier_fixups(enum stf_barrier_type types)
  
                 pr_devel("patching dest %lx\n", (unsigned long)dest);
  
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-               if (types & STF_BARRIER_FALLBACK)
+               // See comment in do_entry_flush_fixups() RE order of patching
+               if (types & STF_BARRIER_FALLBACK) {
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
                         patch_branch((struct ppc_inst *)(dest + 1),
-                                    (unsigned long)&stf_barrier_fallback,
-                                    BRANCH_SET_LINK);
-               else
-                       patch_instruction((struct ppc_inst *)(dest + 1),
-                                         ppc_inst(instrs[1]));
-
-               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                                    (unsigned long)&stf_barrier_fallback, BRANCH_SET_LINK);
+               } else {
+                       patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+               }
         }
  
         printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i,
@@ -227,11 +228,25 @@ static void do_stf_exit_barrier_fixups(enum stf_barrier_type types)
                                                            : "unknown");
  }
  
+static int __do_stf_barrier_fixups(void *data)
+{
+       enum stf_barrier_type *types = data;
+
+       do_stf_entry_barrier_fixups(*types);
+       do_stf_exit_barrier_fixups(*types);
+
+       return 0;
+}
  
  void do_stf_barrier_fixups(enum stf_barrier_type types)
  {
-       do_stf_entry_barrier_fixups(types);
-       do_stf_exit_barrier_fixups(types);
+       /*
+        * The call to the fallback entry flush, and the fallback/sync-ori exit
+        * flush can not be safely patched in/out while other CPUs are executing
+        * them. So call __do_stf_barrier_fixups() on one CPU while all other CPUs
+        * spin in the stop machine core with interrupts hard disabled.
+        */
+       stop_machine(__do_stf_barrier_fixups, &types, NULL);
  }
  
  void do_uaccess_flush_fixups(enum l1d_flush_type types)
@@ -284,8 +299,9 @@ void do_uaccess_flush_fixups(enum l1d_flush_type types)
                                                 : "unknown");
  }
  
-void do_entry_flush_fixups(enum l1d_flush_type types)
+static int __do_entry_flush_fixups(void *data)
  {
+       enum l1d_flush_type types = *(enum l1d_flush_type *)data;
         unsigned int instrs[3], *dest;
         long *start, *end;
         int i;
@@ -309,6 +325,31 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
         if (types & L1D_FLUSH_MTTRIG)
                 instrs[i++] = 0x7c12dba6; /* mtspr TRIG2,r0 (SPR #882) */
  
+       /*
+        * If we're patching in or out the fallback flush we need to be careful about the
+        * order in which we patch instructions. That's because it's possible we could
+        * take a page fault after patching one instruction, so the sequence of
+        * instructions must be safe even in a half patched state.
+        *
+        * To make that work, when patching in the fallback flush we patch in this order:
+        *  - the mflr          (dest)
+        *  - the mtlr          (dest + 2)
+        *  - the branch        (dest + 1)
+        *
+        * That ensures the sequence is safe to execute at any point. In contrast if we
+        * patch the mtlr last, it's possible we could return from the branch and not
+        * restore LR, leading to a crash later.
+        *
+        * When patching out the fallback flush (either with nops or another flush type),
+        * we patch in this order:
+        *  - the branch        (dest + 1)
+        *  - the mtlr          (dest + 2)
+        *  - the mflr          (dest)
+        *
+        * Note we are protected by stop_machine() from other CPUs executing the code in a
+        * semi-patched state.
+        */
+
         start = PTRRELOC(&__start___entry_flush_fixup);
         end = PTRRELOC(&__stop___entry_flush_fixup);
         for (i = 0; start < end; start++, i++) {
@@ -316,15 +357,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
  
                 pr_devel("patching dest %lx\n", (unsigned long)dest);
  
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-               if (types == L1D_FLUSH_FALLBACK)
-                       patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&entry_flush_fallback,
-                                    BRANCH_SET_LINK);
-               else
+               if (types == L1D_FLUSH_FALLBACK) {
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_branch((struct ppc_inst *)(dest + 1),
+                                    (unsigned long)&entry_flush_fallback, BRANCH_SET_LINK);
+               } else {
                         patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
-               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+               }
         }
  
         start = PTRRELOC(&__start___scv_entry_flush_fixup);
@@ -334,15 +376,16 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
  
                 pr_devel("patching dest %lx\n", (unsigned long)dest);
  
-               patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
-
-               if (types == L1D_FLUSH_FALLBACK)
-                       patch_branch((struct ppc_inst *)(dest + 1), (unsigned long)&scv_entry_flush_fallback,
-                                    BRANCH_SET_LINK);
-               else
+               if (types == L1D_FLUSH_FALLBACK) {
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_branch((struct ppc_inst *)(dest + 1),
+                                    (unsigned long)&scv_entry_flush_fallback, BRANCH_SET_LINK);
+               } else {
                         patch_instruction((struct ppc_inst *)(dest + 1), ppc_inst(instrs[1]));
-
-               patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)(dest + 2), ppc_inst(instrs[2]));
+                       patch_instruction((struct ppc_inst *)dest, ppc_inst(instrs[0]));
+               }
         }
  
  
@@ -354,6 +397,19 @@ void do_entry_flush_fixups(enum l1d_flush_type types)
                                                         : "ori type" :
                 (types &  L1D_FLUSH_MTTRIG)     ? "mttrig type"
                                                 : "unknown");
+
+       return 0;
+}
+
+void do_entry_flush_fixups(enum l1d_flush_type types)
+{
+       /*
+        * The call to the fallback flush can not be safely patched in/out while
+        * other CPUs are executing it. So call __do_entry_flush_fixups() on one
+        * CPU while all other CPUs spin in the stop machine core with interrupts
+        * hard disabled.
+        */
+       stop_machine(__do_entry_flush_fixups, &types, NULL);
  }
  
  void do_rfi_flush_fixups(enum l1d_flush_type types)
diff --git a/arch/powerpc/platforms/pseries/hvCall.S b/arch/powerpc/platforms/pseries/hvCall.S

index 2136e42..8a2b8d6 100644 (file)
--- a/arch/powerpc/platforms/pseries/hvCall.S
+++ b/arch/powerpc/platforms/pseries/hvCall.S
@@ -102,6 +102,16 @@ END_FTR_SECTION(0, 1);                                             \
  #define HCALL_BRANCH(LABEL)
  #endif
  
+_GLOBAL_TOC(plpar_hcall_norets_notrace)
+       HMT_MEDIUM
+
+       mfcr    r0
+       stw     r0,8(r1)
+       HVSC                            /* invoke the hypervisor */
+       lwz     r0,8(r1)
+       mtcrf   0xff,r0
+       blr                             /* return r3 = status */
+
  _GLOBAL_TOC(plpar_hcall_norets)
         HMT_MEDIUM
  
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c

index 1f3152a..dab356e 100644 (file)
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1829,30 +1829,28 @@ void hcall_tracepoint_unregfunc(void)
  #endif
  
  /*
- * Since the tracing code might execute hcalls we need to guard against
- * recursion. One example of this are spinlocks calling H_YIELD on
- * shared processor partitions.
+ * Keep track of hcall tracing depth and prevent recursion. Warn if any is
+ * detected because it may indicate a problem. This will not catch all
+ * problems with tracing code making hcalls, because the tracing might have
+ * been invoked from a non-hcall, so the first hcall could recurse into it
+ * without warning here, but this better than nothing.
+ *
+ * Hcalls with specific problems being traced should use the _notrace
+ * plpar_hcall variants.
   */
  static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
  
  
-void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
+notrace void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
  {
         unsigned long flags;
         unsigned int *depth;
  
-       /*
-        * We cannot call tracepoints inside RCU idle regions which
-        * means we must not trace H_CEDE.
-        */
-       if (opcode == H_CEDE)
-               return;
-
         local_irq_save(flags);
  
         depth = this_cpu_ptr(&hcall_trace_depth);
  
-       if (*depth)
+       if (WARN_ON_ONCE(*depth))
                 goto out;
  
         (*depth)++;
@@ -1864,19 +1862,16 @@ out:
         local_irq_restore(flags);
  }
  
-void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
+notrace void __trace_hcall_exit(long opcode, long retval, unsigned long *retbuf)
  {
         unsigned long flags;
         unsigned int *depth;
  
-       if (opcode == H_CEDE)
-               return;
-
         local_irq_save(flags);
  
         depth = this_cpu_ptr(&hcall_trace_depth);
  
-       if (*depth)
+       if (*depth) /* Don't warn again on the way out */
                 goto out;
  
         (*depth)++;
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c

index f5beecd..e76b221 100644 (file)
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -180,7 +180,6 @@ static inline void arch_ftrace_nmi_exit(void) { }
  
  BUILD_TRAP_HANDLER(nmi)
  {
-       unsigned int cpu = smp_processor_id();
         TRAP_HANDLER_DECL;
  
         arch_ftrace_nmi_enter();
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile

index 6e5522a..431bf7f 100644 (file)
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -30,6 +30,7 @@ targets := vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma \
  
  KBUILD_CFLAGS := -m$(BITS) -O2
  KBUILD_CFLAGS += -fno-strict-aliasing -fPIE
+KBUILD_CFLAGS += -Wundef
  KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
  cflags-$(CONFIG_X86_32) := -march=i386
  cflags-$(CONFIG_X86_64) := -mcmodel=small -mno-red-zone
@@ -48,10 +49,10 @@ KBUILD_CFLAGS += $(call as-option,-Wa$(comma)-mrelax-relocations=no)
  KBUILD_CFLAGS += -include $(srctree)/include/linux/hidden.h
  KBUILD_CFLAGS += $(CLANG_FLAGS)
  
-# sev-es.c indirectly inludes inat-table.h which is generated during
+# sev.c indirectly inludes inat-table.h which is generated during
  # compilation and stored in $(objtree). Add the directory to the includes so
  # that the compiler finds it even with out-of-tree builds (make O=/some/path).
-CFLAGS_sev-es.o += -I$(objtree)/arch/x86/lib/
+CFLAGS_sev.o += -I$(objtree)/arch/x86/lib/
  
  KBUILD_AFLAGS  := $(KBUILD_CFLAGS) -D__ASSEMBLY__
  GCOV_PROFILE := n
@@ -93,7 +94,7 @@ ifdef CONFIG_X86_64
         vmlinux-objs-y += $(obj)/idt_64.o $(obj)/idt_handlers_64.o
         vmlinux-objs-y += $(obj)/mem_encrypt.o
         vmlinux-objs-y += $(obj)/pgtable_64.o
-       vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev-es.o
+       vmlinux-objs-$(CONFIG_AMD_MEM_ENCRYPT) += $(obj)/sev.o
  endif
  
  vmlinux-objs-$(CONFIG_ACPI) += $(obj)/acpi.o
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c

index dde042f..743f13e 100644 (file)
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -172,7 +172,7 @@ void __puthex(unsigned long value)
         }
  }
  
-#if CONFIG_X86_NEED_RELOCS
+#ifdef CONFIG_X86_NEED_RELOCS
  static void handle_relocations(void *output, unsigned long output_len,
                                unsigned long virt_addr)
  {
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h

index e5612f0..3113925 100644 (file)
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -79,7 +79,7 @@ struct mem_vector {
         u64 size;
  };
  
-#if CONFIG_RANDOMIZE_BASE
+#ifdef CONFIG_RANDOMIZE_BASE
  /* kaslr.c */
  void choose_random_location(unsigned long input,
                             unsigned long input_size,
diff --git a/arch/x86/boot/compressed/sev-es.c b/arch/x86/boot/compressed/sev-es.c

deleted file mode 100644 (file)

index 82041bd..0000000
--- a/arch/x86/boot/compressed/sev-es.c
+++ /dev/null
@@ -1,206 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * AMD Encrypted Register State Support
- *
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-/*
- * misc.h needs to be first because it knows how to include the other kernel
- * headers in the pre-decompression code in a way that does not break
- * compilation.
- */
-#include "misc.h"
-
-#include <asm/pgtable_types.h>
-#include <asm/sev-es.h>
-#include <asm/trapnr.h>
-#include <asm/trap_pf.h>
-#include <asm/msr-index.h>
-#include <asm/fpu/xcr.h>
-#include <asm/ptrace.h>
-#include <asm/svm.h>
-
-#include "error.h"
-
-struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
-struct ghcb *boot_ghcb;
-
-/*
- * Copy a version of this function here - insn-eval.c can't be used in
- * pre-decompression code.
- */
-static bool insn_has_rep_prefix(struct insn *insn)
-{
-       insn_byte_t p;
-       int i;
-
-       insn_get_prefixes(insn);
-
-       for_each_insn_prefix(insn, i, p) {
-               if (p == 0xf2 || p == 0xf3)
-                       return true;
-       }
-
-       return false;
-}
-
-/*
- * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
- * doesn't use segments.
- */
-static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
-{
-       return 0UL;
-}
-
-static inline u64 sev_es_rd_ghcb_msr(void)
-{
-       unsigned long low, high;
-
-       asm volatile("rdmsr" : "=a" (low), "=d" (high) :
-                       "c" (MSR_AMD64_SEV_ES_GHCB));
-
-       return ((high << 32) | low);
-}
-
-static inline void sev_es_wr_ghcb_msr(u64 val)
-{
-       u32 low, high;
-
-       low  = val & 0xffffffffUL;
-       high = val >> 32;
-
-       asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
-                       "a"(low), "d" (high) : "memory");
-}
-
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
-{
-       char buffer[MAX_INSN_SIZE];
-       int ret;
-
-       memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
-
-       ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
-       if (ret < 0)
-               return ES_DECODE_FAILED;
-
-       return ES_OK;
-}
-
-static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
-                                  void *dst, char *buf, size_t size)
-{
-       memcpy(dst, buf, size);
-
-       return ES_OK;
-}
-
-static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
-                                 void *src, char *buf, size_t size)
-{
-       memcpy(buf, src, size);
-
-       return ES_OK;
-}
-
-#undef __init
-#undef __pa
-#define __init
-#define __pa(x)        ((unsigned long)(x))
-
-#define __BOOT_COMPRESSED
-
-/* Basic instruction decoding support needed */
-#include "../../lib/inat.c"
-#include "../../lib/insn.c"
-
-/* Include code for early handlers */
-#include "../../kernel/sev-es-shared.c"
-
-static bool early_setup_sev_es(void)
-{
-       if (!sev_es_negotiate_protocol())
-               sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED);
-
-       if (set_page_decrypted((unsigned long)&boot_ghcb_page))
-               return false;
-
-       /* Page is now mapped decrypted, clear it */
-       memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page));
-
-       boot_ghcb = &boot_ghcb_page;
-
-       /* Initialize lookup tables for the instruction decoder */
-       inat_init_tables();
-
-       return true;
-}
-
-void sev_es_shutdown_ghcb(void)
-{
-       if (!boot_ghcb)
-               return;
-
-       if (!sev_es_check_cpu_features())
-               error("SEV-ES CPU Features missing.");
-
-       /*
-        * GHCB Page must be flushed from the cache and mapped encrypted again.
-        * Otherwise the running kernel will see strange cache effects when
-        * trying to use that page.
-        */
-       if (set_page_encrypted((unsigned long)&boot_ghcb_page))
-               error("Can't map GHCB page encrypted");
-
-       /*
-        * GHCB page is mapped encrypted again and flushed from the cache.
-        * Mark it non-present now to catch bugs when #VC exceptions trigger
-        * after this point.
-        */
-       if (set_page_non_present((unsigned long)&boot_ghcb_page))
-               error("Can't unmap GHCB page");
-}
-
-bool sev_es_check_ghcb_fault(unsigned long address)
-{
-       /* Check whether the fault was on the GHCB page */
-       return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
-}
-
-void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
-{
-       struct es_em_ctxt ctxt;
-       enum es_result result;
-
-       if (!boot_ghcb && !early_setup_sev_es())
-               sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-
-       vc_ghcb_invalidate(boot_ghcb);
-       result = vc_init_em_ctxt(&ctxt, regs, exit_code);
-       if (result != ES_OK)
-               goto finish;
-
-       switch (exit_code) {
-       case SVM_EXIT_RDTSC:
-       case SVM_EXIT_RDTSCP:
-               result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
-               break;
-       case SVM_EXIT_IOIO:
-               result = vc_handle_ioio(boot_ghcb, &ctxt);
-               break;
-       case SVM_EXIT_CPUID:
-               result = vc_handle_cpuid(boot_ghcb, &ctxt);
-               break;
-       default:
-               result = ES_UNSUPPORTED;
-               break;
-       }
-
-finish:
-       if (result == ES_OK)
-               vc_finish_insn(&ctxt);
-       else if (result != ES_RETRY)
-               sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-}
diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c

new file mode 100644 (file)

index 0000000..670e998
--- /dev/null
+++ b/arch/x86/boot/compressed/sev.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+/*
+ * misc.h needs to be first because it knows how to include the other kernel
+ * headers in the pre-decompression code in a way that does not break
+ * compilation.
+ */
+#include "misc.h"
+
+#include <asm/pgtable_types.h>
+#include <asm/sev.h>
+#include <asm/trapnr.h>
+#include <asm/trap_pf.h>
+#include <asm/msr-index.h>
+#include <asm/fpu/xcr.h>
+#include <asm/ptrace.h>
+#include <asm/svm.h>
+
+#include "error.h"
+
+struct ghcb boot_ghcb_page __aligned(PAGE_SIZE);
+struct ghcb *boot_ghcb;
+
+/*
+ * Copy a version of this function here - insn-eval.c can't be used in
+ * pre-decompression code.
+ */
+static bool insn_has_rep_prefix(struct insn *insn)
+{
+       insn_byte_t p;
+       int i;
+
+       insn_get_prefixes(insn);
+
+       for_each_insn_prefix(insn, i, p) {
+               if (p == 0xf2 || p == 0xf3)
+                       return true;
+       }
+
+       return false;
+}
+
+/*
+ * Only a dummy for insn_get_seg_base() - Early boot-code is 64bit only and
+ * doesn't use segments.
+ */
+static unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx)
+{
+       return 0UL;
+}
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+       unsigned long low, high;
+
+       asm volatile("rdmsr" : "=a" (low), "=d" (high) :
+                       "c" (MSR_AMD64_SEV_ES_GHCB));
+
+       return ((high << 32) | low);
+}
+
+static inline void sev_es_wr_ghcb_msr(u64 val)
+{
+       u32 low, high;
+
+       low  = val & 0xffffffffUL;
+       high = val >> 32;
+
+       asm volatile("wrmsr" : : "c" (MSR_AMD64_SEV_ES_GHCB),
+                       "a"(low), "d" (high) : "memory");
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+       char buffer[MAX_INSN_SIZE];
+       int ret;
+
+       memcpy(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+
+       ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+       if (ret < 0)
+               return ES_DECODE_FAILED;
+
+       return ES_OK;
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+                                  void *dst, char *buf, size_t size)
+{
+       memcpy(dst, buf, size);
+
+       return ES_OK;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+                                 void *src, char *buf, size_t size)
+{
+       memcpy(buf, src, size);
+
+       return ES_OK;
+}
+
+#undef __init
+#undef __pa
+#define __init
+#define __pa(x)        ((unsigned long)(x))
+
+#define __BOOT_COMPRESSED
+
+/* Basic instruction decoding support needed */
+#include "../../lib/inat.c"
+#include "../../lib/insn.c"
+
+/* Include code for early handlers */
+#include "../../kernel/sev-shared.c"
+
+static bool early_setup_sev_es(void)
+{
+       if (!sev_es_negotiate_protocol())
+               sev_es_terminate(GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED);
+
+       if (set_page_decrypted((unsigned long)&boot_ghcb_page))
+               return false;
+
+       /* Page is now mapped decrypted, clear it */
+       memset(&boot_ghcb_page, 0, sizeof(boot_ghcb_page));
+
+       boot_ghcb = &boot_ghcb_page;
+
+       /* Initialize lookup tables for the instruction decoder */
+       inat_init_tables();
+
+       return true;
+}
+
+void sev_es_shutdown_ghcb(void)
+{
+       if (!boot_ghcb)
+               return;
+
+       if (!sev_es_check_cpu_features())
+               error("SEV-ES CPU Features missing.");
+
+       /*
+        * GHCB Page must be flushed from the cache and mapped encrypted again.
+        * Otherwise the running kernel will see strange cache effects when
+        * trying to use that page.
+        */
+       if (set_page_encrypted((unsigned long)&boot_ghcb_page))
+               error("Can't map GHCB page encrypted");
+
+       /*
+        * GHCB page is mapped encrypted again and flushed from the cache.
+        * Mark it non-present now to catch bugs when #VC exceptions trigger
+        * after this point.
+        */
+       if (set_page_non_present((unsigned long)&boot_ghcb_page))
+               error("Can't unmap GHCB page");
+}
+
+bool sev_es_check_ghcb_fault(unsigned long address)
+{
+       /* Check whether the fault was on the GHCB page */
+       return ((address & PAGE_MASK) == (unsigned long)&boot_ghcb_page);
+}
+
+void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code)
+{
+       struct es_em_ctxt ctxt;
+       enum es_result result;
+
+       if (!boot_ghcb && !early_setup_sev_es())
+               sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+       vc_ghcb_invalidate(boot_ghcb);
+       result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+       if (result != ES_OK)
+               goto finish;
+
+       switch (exit_code) {
+       case SVM_EXIT_RDTSC:
+       case SVM_EXIT_RDTSCP:
+               result = vc_handle_rdtsc(boot_ghcb, &ctxt, exit_code);
+               break;
+       case SVM_EXIT_IOIO:
+               result = vc_handle_ioio(boot_ghcb, &ctxt);
+               break;
+       case SVM_EXIT_CPUID:
+               result = vc_handle_cpuid(boot_ghcb, &ctxt);
+               break;
+       default:
+               result = ES_UNSUPPORTED;
+               break;
+       }
+
+finish:
+       if (result == ES_OK)
+               vc_finish_insn(&ctxt);
+       else if (result != ES_RETRY)
+               sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+}
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h

index cbbcee0..55efbac 100644 (file)
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -113,6 +113,7 @@
  #define VALID_PAGE(x) ((x) != INVALID_PAGE)
  
  #define UNMAPPED_GVA (~(gpa_t)0)
+#define INVALID_GPA (~(gpa_t)0)
  
  /* KVM Hugepage definitions for x86 */
  #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
@@ -199,6 +200,7 @@ enum x86_intercept_stage;
  
  #define KVM_NR_DB_REGS 4
  
+#define DR6_BUS_LOCK   (1 << 11)
  #define DR6_BD         (1 << 13)
  #define DR6_BS         (1 << 14)
  #define DR6_BT         (1 << 15)
@@ -212,7 +214,7 @@ enum x86_intercept_stage;
   * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
   */
  #define DR6_ACTIVE_LOW 0xffff0ff0
-#define DR6_VOLATILE   0x0001e00f
+#define DR6_VOLATILE   0x0001e80f
  #define DR6_FIXED_1    (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
  
  #define DR7_BP_EN_MASK 0x000000ff
@@ -407,7 +409,7 @@ struct kvm_mmu {
         u32 pkru_mask;
  
         u64 *pae_root;
-       u64 *lm_root;
+       u64 *pml4_root;
  
         /*
          * check zero bits on shadow page table entries, these
@@ -1417,6 +1419,7 @@ struct kvm_arch_async_pf {
         bool direct_map;
  };
  
+extern u32 __read_mostly kvm_nr_uret_msrs;
  extern u64 __read_mostly host_efer;
  extern bool __read_mostly allow_smaller_maxphyaddr;
  extern struct kvm_x86_ops kvm_x86_ops;
@@ -1775,9 +1778,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
                     unsigned long ipi_bitmap_high, u32 min,
                     unsigned long icr, int op_64_bit);
  
-void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_add_user_return_msr(u32 msr);
+int kvm_find_user_return_msr(u32 msr);
  int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
  
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+       return kvm_find_user_return_msr(msr) >= 0;
+}
+
  u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
  u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
  
diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h

index 3381198..6929987 100644 (file)
--- a/arch/x86/include/asm/kvm_para.h
+++ b/arch/x86/include/asm/kvm_para.h
@@ -7,8 +7,6 @@
  #include <linux/interrupt.h>
  #include <uapi/asm/kvm_para.h>
  
-extern void kvmclock_init(void);
-
  #ifdef CONFIG_KVM_GUEST
  bool kvm_check_and_clear_guest_paused(void);
  #else
@@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
  }
  
  #ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
  bool kvm_para_available(void);
  unsigned int kvm_arch_para_features(void);
  unsigned int kvm_arch_para_hints(void);
  void kvm_async_pf_task_wait_schedule(u32 token);
  void kvm_async_pf_task_wake(u32 token);
  u32 kvm_read_and_reset_apf_flags(void);
-void kvm_disable_steal_time(void);
  bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
  
  DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
         return 0;
  }
  
-static inline void kvm_disable_steal_time(void)
-{
-       return;
-}
-
  static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
  {
         return false;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h

index 742d89a..211ba33 100644 (file)
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -537,9 +537,9 @@
  /* K8 MSRs */
  #define MSR_K8_TOP_MEM1                        0xc001001a
  #define MSR_K8_TOP_MEM2                        0xc001001d
-#define MSR_K8_SYSCFG                  0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT  23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT      BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG               0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT       23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT   BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
  #define MSR_K8_INT_PENDING_MSG         0xc0010055
  /* C1E active bits in int pending message */
  #define K8_INTP_C1E_ACTIVE_MASK                0x18000000
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h

index 154321d..556b2b1 100644 (file)
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -787,8 +787,10 @@ DECLARE_PER_CPU(u64, msr_misc_features_shadow);
  
  #ifdef CONFIG_CPU_SUP_AMD
  extern u32 amd_get_nodes_per_socket(void);
+extern u32 amd_get_highest_perf(void);
  #else
  static inline u32 amd_get_nodes_per_socket(void)       { return 0; }
+static inline u32 amd_get_highest_perf(void)           { return 0; }
  #endif
  
  static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h

new file mode 100644 (file)

index 0000000..629c3df
--- /dev/null
+++ b/arch/x86/include/asm/sev-common.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD SEV header common between the guest and the hypervisor.
+ *
+ * Author: Brijesh Singh <brijesh.singh@amd.com>
+ */
+
+#ifndef __ASM_X86_SEV_COMMON_H
+#define __ASM_X86_SEV_COMMON_H
+
+#define GHCB_MSR_INFO_POS              0
+#define GHCB_MSR_INFO_MASK             (BIT_ULL(12) - 1)
+
+#define GHCB_MSR_SEV_INFO_RESP         0x001
+#define GHCB_MSR_SEV_INFO_REQ          0x002
+#define GHCB_MSR_VER_MAX_POS           48
+#define GHCB_MSR_VER_MAX_MASK          0xffff
+#define GHCB_MSR_VER_MIN_POS           32
+#define GHCB_MSR_VER_MIN_MASK          0xffff
+#define GHCB_MSR_CBIT_POS              24
+#define GHCB_MSR_CBIT_MASK             0xff
+#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)                           \
+       ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |   \
+        (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |   \
+        (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |        \
+        GHCB_MSR_SEV_INFO_RESP)
+#define GHCB_MSR_INFO(v)               ((v) & 0xfffUL)
+#define GHCB_MSR_PROTO_MAX(v)          (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
+#define GHCB_MSR_PROTO_MIN(v)          (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)
+
+#define GHCB_MSR_CPUID_REQ             0x004
+#define GHCB_MSR_CPUID_RESP            0x005
+#define GHCB_MSR_CPUID_FUNC_POS                32
+#define GHCB_MSR_CPUID_FUNC_MASK       0xffffffff
+#define GHCB_MSR_CPUID_VALUE_POS       32
+#define GHCB_MSR_CPUID_VALUE_MASK      0xffffffff
+#define GHCB_MSR_CPUID_REG_POS         30
+#define GHCB_MSR_CPUID_REG_MASK                0x3
+#define GHCB_CPUID_REQ_EAX             0
+#define GHCB_CPUID_REQ_EBX             1
+#define GHCB_CPUID_REQ_ECX             2
+#define GHCB_CPUID_REQ_EDX             3
+#define GHCB_CPUID_REQ(fn, reg)                \
+               (GHCB_MSR_CPUID_REQ | \
+               (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
+               (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
+
+#define GHCB_MSR_TERM_REQ              0x100
+#define GHCB_MSR_TERM_REASON_SET_POS   12
+#define GHCB_MSR_TERM_REASON_SET_MASK  0xf
+#define GHCB_MSR_TERM_REASON_POS       16
+#define GHCB_MSR_TERM_REASON_MASK      0xff
+#define GHCB_SEV_TERM_REASON(reason_set, reason_val)                                             \
+       (((((u64)reason_set) &  GHCB_MSR_TERM_REASON_SET_MASK) << GHCB_MSR_TERM_REASON_SET_POS) | \
+       ((((u64)reason_val) & GHCB_MSR_TERM_REASON_MASK) << GHCB_MSR_TERM_REASON_POS))
+
+#define GHCB_SEV_ES_REASON_GENERAL_REQUEST     0
+#define GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED        1
+
+#define GHCB_RESP_CODE(v)              ((v) & GHCB_MSR_INFO_MASK)
+
+#endif
diff --git a/arch/x86/include/asm/sev-es.h b/arch/x86/include/asm/sev-es.h

deleted file mode 100644 (file)

index cf1d957..0000000
--- a/arch/x86/include/asm/sev-es.h
+++ /dev/null
@@ -1,114 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * AMD Encrypted Register State Support
- *
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#ifndef __ASM_ENCRYPTED_STATE_H
-#define __ASM_ENCRYPTED_STATE_H
-
-#include <linux/types.h>
-#include <asm/insn.h>
-
-#define GHCB_SEV_INFO          0x001UL
-#define GHCB_SEV_INFO_REQ      0x002UL
-#define                GHCB_INFO(v)            ((v) & 0xfffUL)
-#define                GHCB_PROTO_MAX(v)       (((v) >> 48) & 0xffffUL)
-#define                GHCB_PROTO_MIN(v)       (((v) >> 32) & 0xffffUL)
-#define                GHCB_PROTO_OUR          0x0001UL
-#define GHCB_SEV_CPUID_REQ     0x004UL
-#define                GHCB_CPUID_REQ_EAX      0
-#define                GHCB_CPUID_REQ_EBX      1
-#define                GHCB_CPUID_REQ_ECX      2
-#define                GHCB_CPUID_REQ_EDX      3
-#define                GHCB_CPUID_REQ(fn, reg) (GHCB_SEV_CPUID_REQ | \
-                                       (((unsigned long)reg & 3) << 30) | \
-                                       (((unsigned long)fn) << 32))
-
-#define        GHCB_PROTOCOL_MAX       0x0001UL
-#define GHCB_DEFAULT_USAGE     0x0000UL
-
-#define GHCB_SEV_CPUID_RESP    0x005UL
-#define GHCB_SEV_TERMINATE     0x100UL
-#define                GHCB_SEV_TERMINATE_REASON(reason_set, reason_val)       \
-                       (((((u64)reason_set) &  0x7) << 12) |           \
-                        ((((u64)reason_val) & 0xff) << 16))
-#define                GHCB_SEV_ES_REASON_GENERAL_REQUEST      0
-#define                GHCB_SEV_ES_REASON_PROTOCOL_UNSUPPORTED 1
-
-#define        GHCB_SEV_GHCB_RESP_CODE(v)      ((v) & 0xfff)
-#define        VMGEXIT()                       { asm volatile("rep; vmmcall\n\r"); }
-
-enum es_result {
-       ES_OK,                  /* All good */
-       ES_UNSUPPORTED,         /* Requested operation not supported */
-       ES_VMM_ERROR,           /* Unexpected state from the VMM */
-       ES_DECODE_FAILED,       /* Instruction decoding failed */
-       ES_EXCEPTION,           /* Instruction caused exception */
-       ES_RETRY,               /* Retry instruction emulation */
-};
-
-struct es_fault_info {
-       unsigned long vector;
-       unsigned long error_code;
-       unsigned long cr2;
-};
-
-struct pt_regs;
-
-/* ES instruction emulation context */
-struct es_em_ctxt {
-       struct pt_regs *regs;
-       struct insn insn;
-       struct es_fault_info fi;
-};
-
-void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
-
-static inline u64 lower_bits(u64 val, unsigned int bits)
-{
-       u64 mask = (1ULL << bits) - 1;
-
-       return (val & mask);
-}
-
-struct real_mode_header;
-enum stack_type;
-
-/* Early IDT entry points for #VC handler */
-extern void vc_no_ghcb(void);
-extern void vc_boot_ghcb(void);
-extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
-
-#ifdef CONFIG_AMD_MEM_ENCRYPT
-extern struct static_key_false sev_es_enable_key;
-extern void __sev_es_ist_enter(struct pt_regs *regs);
-extern void __sev_es_ist_exit(void);
-static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
-{
-       if (static_branch_unlikely(&sev_es_enable_key))
-               __sev_es_ist_enter(regs);
-}
-static __always_inline void sev_es_ist_exit(void)
-{
-       if (static_branch_unlikely(&sev_es_enable_key))
-               __sev_es_ist_exit();
-}
-extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
-extern void __sev_es_nmi_complete(void);
-static __always_inline void sev_es_nmi_complete(void)
-{
-       if (static_branch_unlikely(&sev_es_enable_key))
-               __sev_es_nmi_complete();
-}
-extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
-#else
-static inline void sev_es_ist_enter(struct pt_regs *regs) { }
-static inline void sev_es_ist_exit(void) { }
-static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
-static inline void sev_es_nmi_complete(void) { }
-static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
-#endif
-
-#endif
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h

new file mode 100644 (file)

index 0000000..fa5cd05
--- /dev/null
+++ b/arch/x86/include/asm/sev.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#ifndef __ASM_ENCRYPTED_STATE_H
+#define __ASM_ENCRYPTED_STATE_H
+
+#include <linux/types.h>
+#include <asm/insn.h>
+#include <asm/sev-common.h>
+
+#define GHCB_PROTO_OUR         0x0001UL
+#define GHCB_PROTOCOL_MAX      1ULL
+#define GHCB_DEFAULT_USAGE     0ULL
+
+#define        VMGEXIT()                       { asm volatile("rep; vmmcall\n\r"); }
+
+enum es_result {
+       ES_OK,                  /* All good */
+       ES_UNSUPPORTED,         /* Requested operation not supported */
+       ES_VMM_ERROR,           /* Unexpected state from the VMM */
+       ES_DECODE_FAILED,       /* Instruction decoding failed */
+       ES_EXCEPTION,           /* Instruction caused exception */
+       ES_RETRY,               /* Retry instruction emulation */
+};
+
+struct es_fault_info {
+       unsigned long vector;
+       unsigned long error_code;
+       unsigned long cr2;
+};
+
+struct pt_regs;
+
+/* ES instruction emulation context */
+struct es_em_ctxt {
+       struct pt_regs *regs;
+       struct insn insn;
+       struct es_fault_info fi;
+};
+
+void do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code);
+
+static inline u64 lower_bits(u64 val, unsigned int bits)
+{
+       u64 mask = (1ULL << bits) - 1;
+
+       return (val & mask);
+}
+
+struct real_mode_header;
+enum stack_type;
+
+/* Early IDT entry points for #VC handler */
+extern void vc_no_ghcb(void);
+extern void vc_boot_ghcb(void);
+extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+extern struct static_key_false sev_es_enable_key;
+extern void __sev_es_ist_enter(struct pt_regs *regs);
+extern void __sev_es_ist_exit(void);
+static __always_inline void sev_es_ist_enter(struct pt_regs *regs)
+{
+       if (static_branch_unlikely(&sev_es_enable_key))
+               __sev_es_ist_enter(regs);
+}
+static __always_inline void sev_es_ist_exit(void)
+{
+       if (static_branch_unlikely(&sev_es_enable_key))
+               __sev_es_ist_exit();
+}
+extern int sev_es_setup_ap_jump_table(struct real_mode_header *rmh);
+extern void __sev_es_nmi_complete(void);
+static __always_inline void sev_es_nmi_complete(void)
+{
+       if (static_branch_unlikely(&sev_es_enable_key))
+               __sev_es_nmi_complete();
+}
+extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
+#else
+static inline void sev_es_ist_enter(struct pt_regs *regs) { }
+static inline void sev_es_ist_exit(void) { }
+static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; }
+static inline void sev_es_nmi_complete(void) { }
+static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; }
+#endif
+
+#endif
diff --git a/arch/x86/include/asm/vdso/clocksource.h b/arch/x86/include/asm/vdso/clocksource.h

index 119ac86..136e5e5 100644 (file)
--- a/arch/x86/include/asm/vdso/clocksource.h
+++ b/arch/x86/include/asm/vdso/clocksource.h
@@ -7,4 +7,6 @@
         VDSO_CLOCKMODE_PVCLOCK, \
         VDSO_CLOCKMODE_HVCLOCK
  
+#define HAVE_VDSO_CLOCKMODE_HVCLOCK
+
  #endif /* __ASM_VDSO_CLOCKSOURCE_H */
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h

index 5a3022c..0662f64 100644 (file)
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
                 __u16 flags;
         } smm;
  
+       __u16 pad;
+
         __u32 flags;
         __u64 preemption_timer_deadline;
  };
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile

index 0704c2a..0f66682 100644 (file)
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -20,7 +20,7 @@ CFLAGS_REMOVE_kvmclock.o = -pg
  CFLAGS_REMOVE_ftrace.o = -pg
  CFLAGS_REMOVE_early_printk.o = -pg
  CFLAGS_REMOVE_head64.o = -pg
-CFLAGS_REMOVE_sev-es.o = -pg
+CFLAGS_REMOVE_sev.o = -pg
  endif
  
  KASAN_SANITIZE_head$(BITS).o                           := n
@@ -28,7 +28,7 @@ KASAN_SANITIZE_dumpstack.o                            := n
  KASAN_SANITIZE_dumpstack_$(BITS).o                     := n
  KASAN_SANITIZE_stacktrace.o                            := n
  KASAN_SANITIZE_paravirt.o                              := n
-KASAN_SANITIZE_sev-es.o                                        := n
+KASAN_SANITIZE_sev.o                                   := n
  
  # With some compiler versions the generated code results in boot hangs, caused
  # by several compilation units. To be safe, disable all instrumentation.
@@ -148,7 +148,7 @@ obj-$(CONFIG_UNWINDER_ORC)          += unwind_orc.o
  obj-$(CONFIG_UNWINDER_FRAME_POINTER)   += unwind_frame.o
  obj-$(CONFIG_UNWINDER_GUESS)           += unwind_guess.o
  
-obj-$(CONFIG_AMD_MEM_ENCRYPT)          += sev-es.o
+obj-$(CONFIG_AMD_MEM_ENCRYPT)          += sev.o
  ###
  # 64 bit specific files
  ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c

index 2d11384..c06ac56 100644 (file)
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -593,8 +593,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c)
          */
         if (cpu_has(c, X86_FEATURE_SME) || cpu_has(c, X86_FEATURE_SEV)) {
                 /* Check if memory encryption is enabled */
-               rdmsrl(MSR_K8_SYSCFG, msr);
-               if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+               rdmsrl(MSR_AMD64_SYSCFG, msr);
+               if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
                         goto clear_all;
  
                 /*
@@ -1165,3 +1165,19 @@ void set_dr_addr_mask(unsigned long mask, int dr)
                 break;
         }
  }
+
+u32 amd_get_highest_perf(void)
+{
+       struct cpuinfo_x86 *c = &boot_cpu_data;
+
+       if (c->x86 == 0x17 && ((c->x86_model >= 0x30 && c->x86_model < 0x40) ||
+                              (c->x86_model >= 0x70 && c->x86_model < 0x80)))
+               return 166;
+
+       if (c->x86 == 0x19 && ((c->x86_model >= 0x20 && c->x86_model < 0x30) ||
+                              (c->x86_model >= 0x40 && c->x86_model < 0x70)))
+               return 166;
+
+       return 255;
+}
+EXPORT_SYMBOL_GPL(amd_get_highest_perf);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c

index 0c3b372..b5f4304 100644 (file)
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -836,7 +836,7 @@ int __init amd_special_default_mtrr(void)
         if (boot_cpu_data.x86 < 0xf)
                 return 0;
         /* In case some hypervisor doesn't pass SYSCFG through: */
-       if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
+       if (rdmsr_safe(MSR_AMD64_SYSCFG, &l, &h) < 0)
                 return 0;
         /*
          * Memory between 4GB and top of mem is forced WB by this magic bit.
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c

index b90f3f4..5581082 100644 (file)
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -53,13 +53,13 @@ static inline void k8_check_syscfg_dram_mod_en(void)
               (boot_cpu_data.x86 >= 0x0f)))
                 return;
  
-       rdmsr(MSR_K8_SYSCFG, lo, hi);
+       rdmsr(MSR_AMD64_SYSCFG, lo, hi);
         if (lo & K8_MTRRFIXRANGE_DRAM_MODIFY) {
                 pr_err(FW_WARN "MTRR: CPU %u: SYSCFG[MtrrFixDramModEn]"
                        " not cleared by BIOS, clearing this bit\n",
                        smp_processor_id());
                 lo &= ~K8_MTRRFIXRANGE_DRAM_MODIFY;
-               mtrr_wrmsr(MSR_K8_SYSCFG, lo, hi);
+               mtrr_wrmsr(MSR_AMD64_SYSCFG, lo, hi);
         }
  }
  
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c

index 18be441..de01903 100644 (file)
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -39,7 +39,7 @@
  #include <asm/realmode.h>
  #include <asm/extable.h>
  #include <asm/trapnr.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
  
  /*
   * Manage page tables very early on.
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c

index d307c22..a26643d 100644 (file)
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,6 +26,7 @@
  #include <linux/kprobes.h>
  #include <linux/nmi.h>
  #include <linux/swait.h>
+#include <linux/syscore_ops.h>
  #include <asm/timer.h>
  #include <asm/cpu.h>
  #include <asm/traps.h>
@@ -37,6 +38,7 @@
  #include <asm/tlb.h>
  #include <asm/cpuidle_haltpoll.h>
  #include <asm/ptrace.h>
+#include <asm/reboot.h>
  #include <asm/svm.h>
  
  DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -345,7 +347,7 @@ static void kvm_guest_cpu_init(void)
  
                 wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
                 __this_cpu_write(apf_reason.enabled, 1);
-               pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
+               pr_info("setup async PF for cpu %d\n", smp_processor_id());
         }
  
         if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -371,34 +373,17 @@ static void kvm_pv_disable_apf(void)
         wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
         __this_cpu_write(apf_reason.enabled, 0);
  
-       pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
+       pr_info("disable async PF for cpu %d\n", smp_processor_id());
  }
  
-static void kvm_pv_guest_cpu_reboot(void *unused)
+static void kvm_disable_steal_time(void)
  {
-       /*
-        * We disable PV EOI before we load a new kernel by kexec,
-        * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
-        * New kernel can re-enable when it boots.
-        */
-       if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
-               wrmsrl(MSR_KVM_PV_EOI_EN, 0);
-       kvm_pv_disable_apf();
-       kvm_disable_steal_time();
-}
+       if (!has_steal_clock)
+               return;
  
-static int kvm_pv_reboot_notify(struct notifier_block *nb,
-                               unsigned long code, void *unused)
-{
-       if (code == SYS_RESTART)
-               on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
-       return NOTIFY_DONE;
+       wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
  }
  
-static struct notifier_block kvm_pv_reboot_nb = {
-       .notifier_call = kvm_pv_reboot_notify,
-};
-
  static u64 kvm_steal_clock(int cpu)
  {
         u64 steal;
@@ -416,14 +401,6 @@ static u64 kvm_steal_clock(int cpu)
         return steal;
  }
  
-void kvm_disable_steal_time(void)
-{
-       if (!has_steal_clock)
-               return;
-
-       wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
-
  static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
  {
         early_set_memory_decrypted((unsigned long) ptr, size);
@@ -451,6 +428,27 @@ static void __init sev_map_percpu_data(void)
         }
  }
  
+static void kvm_guest_cpu_offline(bool shutdown)
+{
+       kvm_disable_steal_time();
+       if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+               wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+       kvm_pv_disable_apf();
+       if (!shutdown)
+               apf_task_wake_all();
+       kvmclock_disable();
+}
+
+static int kvm_cpu_online(unsigned int cpu)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       kvm_guest_cpu_init();
+       local_irq_restore(flags);
+       return 0;
+}
+
  #ifdef CONFIG_SMP
  
  static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
@@ -635,31 +633,64 @@ static void __init kvm_smp_prepare_boot_cpu(void)
         kvm_spinlock_init();
  }
  
-static void kvm_guest_cpu_offline(void)
+static int kvm_cpu_down_prepare(unsigned int cpu)
  {
-       kvm_disable_steal_time();
-       if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
-               wrmsrl(MSR_KVM_PV_EOI_EN, 0);
-       kvm_pv_disable_apf();
-       apf_task_wake_all();
+       unsigned long flags;
+
+       local_irq_save(flags);
+       kvm_guest_cpu_offline(false);
+       local_irq_restore(flags);
+       return 0;
  }
  
-static int kvm_cpu_online(unsigned int cpu)
+#endif
+
+static int kvm_suspend(void)
  {
-       local_irq_disable();
-       kvm_guest_cpu_init();
-       local_irq_enable();
+       kvm_guest_cpu_offline(false);
+
         return 0;
  }
  
-static int kvm_cpu_down_prepare(unsigned int cpu)
+static void kvm_resume(void)
  {
-       local_irq_disable();
-       kvm_guest_cpu_offline();
-       local_irq_enable();
-       return 0;
+       kvm_cpu_online(raw_smp_processor_id());
+}
+
+static struct syscore_ops kvm_syscore_ops = {
+       .suspend        = kvm_suspend,
+       .resume         = kvm_resume,
+};
+
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+       kvm_guest_cpu_offline(true);
+}
+
+static int kvm_pv_reboot_notify(struct notifier_block *nb,
+                               unsigned long code, void *unused)
+{
+       if (code == SYS_RESTART)
+               on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
+       return NOTIFY_DONE;
  }
  
+static struct notifier_block kvm_pv_reboot_nb = {
+       .notifier_call = kvm_pv_reboot_notify,
+};
+
+/*
+ * After a PV feature is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written.
+ */
+#ifdef CONFIG_KEXEC_CORE
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+       kvm_guest_cpu_offline(true);
+       native_machine_crash_shutdown(regs);
+}
  #endif
  
  static void __init kvm_guest_init(void)
@@ -704,6 +735,12 @@ static void __init kvm_guest_init(void)
         kvm_guest_cpu_init();
  #endif
  
+#ifdef CONFIG_KEXEC_CORE
+       machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+
+       register_syscore_ops(&kvm_syscore_ops);
+
         /*
          * Hard lockup detection is enabled by default. Disable it, as guests
          * can get false positives too easily, for example if the host is
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c

index d37ed4e..ad273e5 100644 (file)
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -20,7 +20,6 @@
  #include <asm/hypervisor.h>
  #include <asm/mem_encrypt.h>
  #include <asm/x86_init.h>
-#include <asm/reboot.h>
  #include <asm/kvmclock.h>
  
  static int kvmclock __initdata = 1;
@@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void)
  }
  #endif
  
-/*
- * After the clock is registered, the host will keep writing to the
- * registered memory location. If the guest happens to shutdown, this memory
- * won't be valid. In cases like kexec, in which you install a new kernel, this
- * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writing anything
- * that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE
-static void kvm_crash_shutdown(struct pt_regs *regs)
-{
-       native_write_msr(msr_kvm_system_time, 0, 0);
-       kvm_disable_steal_time();
-       native_machine_crash_shutdown(regs);
-}
-#endif
-
-static void kvm_shutdown(void)
+void kvmclock_disable(void)
  {
         native_write_msr(msr_kvm_system_time, 0, 0);
-       kvm_disable_steal_time();
-       native_machine_shutdown();
  }
  
  static void __init kvmclock_init_mem(void)
@@ -351,10 +331,6 @@ void __init kvmclock_init(void)
  #endif
         x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
         x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
-       machine_ops.shutdown  = kvm_shutdown;
-#ifdef CONFIG_KEXEC_CORE
-       machine_ops.crash_shutdown  = kvm_crash_shutdown;
-#endif
         kvm_get_preset_lpj();
  
         /*
diff --git a/arch/x86/kernel/mmconf-fam10h_64.c b/arch/x86/kernel/mmconf-fam10h_64.c

index b5cb49e..c94dec6 100644 (file)
--- a/arch/x86/kernel/mmconf-fam10h_64.c
+++ b/arch/x86/kernel/mmconf-fam10h_64.c
@@ -95,7 +95,7 @@ static void get_fam10h_pci_mmconf_base(void)
                 return;
  
         /* SYS_CFG */
-       address = MSR_K8_SYSCFG;
+       address = MSR_AMD64_SYSCFG;
         rdmsrl(address, val);
  
         /* TOP_MEM2 is not enabled? */
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c

index 2ef961c..4bce802 100644 (file)
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -33,7 +33,7 @@
  #include <asm/reboot.h>
  #include <asm/cache.h>
  #include <asm/nospec-branch.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
  
  #define CREATE_TRACE_POINTS
  #include <trace/events/nmi.h>
diff --git a/arch/x86/kernel/sev-es-shared.c b/arch/x86/kernel/sev-es-shared.c

deleted file mode 100644 (file)

index 0aa9f13..0000000
--- a/arch/x86/kernel/sev-es-shared.c
+++ /dev/null
@@ -1,525 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * AMD Encrypted Register State Support
- *
- * Author: Joerg Roedel <jroedel@suse.de>
- *
- * This file is not compiled stand-alone. It contains code shared
- * between the pre-decompression boot code and the running Linux kernel
- * and is included directly into both code-bases.
- */
-
-#ifndef __BOOT_COMPRESSED
-#define error(v)       pr_err(v)
-#define has_cpuflag(f) boot_cpu_has(f)
-#endif
-
-static bool __init sev_es_check_cpu_features(void)
-{
-       if (!has_cpuflag(X86_FEATURE_RDRAND)) {
-               error("RDRAND instruction not supported - no trusted source of randomness available\n");
-               return false;
-       }
-
-       return true;
-}
-
-static void __noreturn sev_es_terminate(unsigned int reason)
-{
-       u64 val = GHCB_SEV_TERMINATE;
-
-       /*
-        * Tell the hypervisor what went wrong - only reason-set 0 is
-        * currently supported.
-        */
-       val |= GHCB_SEV_TERMINATE_REASON(0, reason);
-
-       /* Request Guest Termination from Hypvervisor */
-       sev_es_wr_ghcb_msr(val);
-       VMGEXIT();
-
-       while (true)
-               asm volatile("hlt\n" : : : "memory");
-}
-
-static bool sev_es_negotiate_protocol(void)
-{
-       u64 val;
-
-       /* Do the GHCB protocol version negotiation */
-       sev_es_wr_ghcb_msr(GHCB_SEV_INFO_REQ);
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-
-       if (GHCB_INFO(val) != GHCB_SEV_INFO)
-               return false;
-
-       if (GHCB_PROTO_MAX(val) < GHCB_PROTO_OUR ||
-           GHCB_PROTO_MIN(val) > GHCB_PROTO_OUR)
-               return false;
-
-       return true;
-}
-
-static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
-{
-       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
-}
-
-static bool vc_decoding_needed(unsigned long exit_code)
-{
-       /* Exceptions don't require to decode the instruction */
-       return !(exit_code >= SVM_EXIT_EXCP_BASE &&
-                exit_code <= SVM_EXIT_LAST_EXCP);
-}
-
-static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
-                                     struct pt_regs *regs,
-                                     unsigned long exit_code)
-{
-       enum es_result ret = ES_OK;
-
-       memset(ctxt, 0, sizeof(*ctxt));
-       ctxt->regs = regs;
-
-       if (vc_decoding_needed(exit_code))
-               ret = vc_decode_insn(ctxt);
-
-       return ret;
-}
-
-static void vc_finish_insn(struct es_em_ctxt *ctxt)
-{
-       ctxt->regs->ip += ctxt->insn.length;
-}
-
-static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
-                                         struct es_em_ctxt *ctxt,
-                                         u64 exit_code, u64 exit_info_1,
-                                         u64 exit_info_2)
-{
-       enum es_result ret;
-
-       /* Fill in protocol and format specifiers */
-       ghcb->protocol_version = GHCB_PROTOCOL_MAX;
-       ghcb->ghcb_usage       = GHCB_DEFAULT_USAGE;
-
-       ghcb_set_sw_exit_code(ghcb, exit_code);
-       ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
-       ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
-
-       sev_es_wr_ghcb_msr(__pa(ghcb));
-       VMGEXIT();
-
-       if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) {
-               u64 info = ghcb->save.sw_exit_info_2;
-               unsigned long v;
-
-               info = ghcb->save.sw_exit_info_2;
-               v = info & SVM_EVTINJ_VEC_MASK;
-
-               /* Check if exception information from hypervisor is sane. */
-               if ((info & SVM_EVTINJ_VALID) &&
-                   ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
-                   ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
-                       ctxt->fi.vector = v;
-                       if (info & SVM_EVTINJ_VALID_ERR)
-                               ctxt->fi.error_code = info >> 32;
-                       ret = ES_EXCEPTION;
-               } else {
-                       ret = ES_VMM_ERROR;
-               }
-       } else {
-               ret = ES_OK;
-       }
-
-       return ret;
-}
-
-/*
- * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
- * page yet, so it only supports the MSR based communication with the
- * hypervisor and only the CPUID exit-code.
- */
-void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
-{
-       unsigned int fn = lower_bits(regs->ax, 32);
-       unsigned long val;
-
-       /* Only CPUID is supported via MSR protocol */
-       if (exit_code != SVM_EXIT_CPUID)
-               goto fail;
-
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
-               goto fail;
-       regs->ax = val >> 32;
-
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
-               goto fail;
-       regs->bx = val >> 32;
-
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
-               goto fail;
-       regs->cx = val >> 32;
-
-       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
-       VMGEXIT();
-       val = sev_es_rd_ghcb_msr();
-       if (GHCB_SEV_GHCB_RESP_CODE(val) != GHCB_SEV_CPUID_RESP)
-               goto fail;
-       regs->dx = val >> 32;
-
-       /*
-        * This is a VC handler and the #VC is only raised when SEV-ES is
-        * active, which means SEV must be active too. Do sanity checks on the
-        * CPUID results to make sure the hypervisor does not trick the kernel
-        * into the no-sev path. This could map sensitive data unencrypted and
-        * make it accessible to the hypervisor.
-        *
-        * In particular, check for:
-        *      - Availability of CPUID leaf 0x8000001f
-        *      - SEV CPUID bit.
-        *
-        * The hypervisor might still report the wrong C-bit position, but this
-        * can't be checked here.
-        */
-
-       if (fn == 0x80000000 && (regs->ax < 0x8000001f))
-               /* SEV leaf check */
-               goto fail;
-       else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
-               /* SEV bit */
-               goto fail;
-
-       /* Skip over the CPUID two-byte opcode */
-       regs->ip += 2;
-
-       return;
-
-fail:
-       /* Terminate the guest */
-       sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-}
-
-static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
-                                         void *src, char *buf,
-                                         unsigned int data_size,
-                                         unsigned int count,
-                                         bool backwards)
-{
-       int i, b = backwards ? -1 : 1;
-       enum es_result ret = ES_OK;
-
-       for (i = 0; i < count; i++) {
-               void *s = src + (i * data_size * b);
-               char *d = buf + (i * data_size);
-
-               ret = vc_read_mem(ctxt, s, d, data_size);
-               if (ret != ES_OK)
-                       break;
-       }
-
-       return ret;
-}
-
-static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
-                                          void *dst, char *buf,
-                                          unsigned int data_size,
-                                          unsigned int count,
-                                          bool backwards)
-{
-       int i, s = backwards ? -1 : 1;
-       enum es_result ret = ES_OK;
-
-       for (i = 0; i < count; i++) {
-               void *d = dst + (i * data_size * s);
-               char *b = buf + (i * data_size);
-
-               ret = vc_write_mem(ctxt, d, b, data_size);
-               if (ret != ES_OK)
-                       break;
-       }
-
-       return ret;
-}
-
-#define IOIO_TYPE_STR  BIT(2)
-#define IOIO_TYPE_IN   1
-#define IOIO_TYPE_INS  (IOIO_TYPE_IN | IOIO_TYPE_STR)
-#define IOIO_TYPE_OUT  0
-#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
-
-#define IOIO_REP       BIT(3)
-
-#define IOIO_ADDR_64   BIT(9)
-#define IOIO_ADDR_32   BIT(8)
-#define IOIO_ADDR_16   BIT(7)
-
-#define IOIO_DATA_32   BIT(6)
-#define IOIO_DATA_16   BIT(5)
-#define IOIO_DATA_8    BIT(4)
-
-#define IOIO_SEG_ES    (0 << 10)
-#define IOIO_SEG_DS    (3 << 10)
-
-static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
-{
-       struct insn *insn = &ctxt->insn;
-       *exitinfo = 0;
-
-       switch (insn->opcode.bytes[0]) {
-       /* INS opcodes */
-       case 0x6c:
-       case 0x6d:
-               *exitinfo |= IOIO_TYPE_INS;
-               *exitinfo |= IOIO_SEG_ES;
-               *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
-               break;
-
-       /* OUTS opcodes */
-       case 0x6e:
-       case 0x6f:
-               *exitinfo |= IOIO_TYPE_OUTS;
-               *exitinfo |= IOIO_SEG_DS;
-               *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
-               break;
-
-       /* IN immediate opcodes */
-       case 0xe4:
-       case 0xe5:
-               *exitinfo |= IOIO_TYPE_IN;
-               *exitinfo |= (u8)insn->immediate.value << 16;
-               break;
-
-       /* OUT immediate opcodes */
-       case 0xe6:
-       case 0xe7:
-               *exitinfo |= IOIO_TYPE_OUT;
-               *exitinfo |= (u8)insn->immediate.value << 16;
-               break;
-
-       /* IN register opcodes */
-       case 0xec:
-       case 0xed:
-               *exitinfo |= IOIO_TYPE_IN;
-               *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
-               break;
-
-       /* OUT register opcodes */
-       case 0xee:
-       case 0xef:
-               *exitinfo |= IOIO_TYPE_OUT;
-               *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
-               break;
-
-       default:
-               return ES_DECODE_FAILED;
-       }
-
-       switch (insn->opcode.bytes[0]) {
-       case 0x6c:
-       case 0x6e:
-       case 0xe4:
-       case 0xe6:
-       case 0xec:
-       case 0xee:
-               /* Single byte opcodes */
-               *exitinfo |= IOIO_DATA_8;
-               break;
-       default:
-               /* Length determined by instruction parsing */
-               *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
-                                                    : IOIO_DATA_32;
-       }
-       switch (insn->addr_bytes) {
-       case 2:
-               *exitinfo |= IOIO_ADDR_16;
-               break;
-       case 4:
-               *exitinfo |= IOIO_ADDR_32;
-               break;
-       case 8:
-               *exitinfo |= IOIO_ADDR_64;
-               break;
-       }
-
-       if (insn_has_rep_prefix(insn))
-               *exitinfo |= IOIO_REP;
-
-       return ES_OK;
-}
-
-static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
-       struct pt_regs *regs = ctxt->regs;
-       u64 exit_info_1, exit_info_2;
-       enum es_result ret;
-
-       ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
-       if (ret != ES_OK)
-               return ret;
-
-       if (exit_info_1 & IOIO_TYPE_STR) {
-
-               /* (REP) INS/OUTS */
-
-               bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
-               unsigned int io_bytes, exit_bytes;
-               unsigned int ghcb_count, op_count;
-               unsigned long es_base;
-               u64 sw_scratch;
-
-               /*
-                * For the string variants with rep prefix the amount of in/out
-                * operations per #VC exception is limited so that the kernel
-                * has a chance to take interrupts and re-schedule while the
-                * instruction is emulated.
-                */
-               io_bytes   = (exit_info_1 >> 4) & 0x7;
-               ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
-
-               op_count    = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
-               exit_info_2 = min(op_count, ghcb_count);
-               exit_bytes  = exit_info_2 * io_bytes;
-
-               es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
-
-               /* Read bytes of OUTS into the shared buffer */
-               if (!(exit_info_1 & IOIO_TYPE_IN)) {
-                       ret = vc_insn_string_read(ctxt,
-                                              (void *)(es_base + regs->si),
-                                              ghcb->shared_buffer, io_bytes,
-                                              exit_info_2, df);
-                       if (ret)
-                               return ret;
-               }
-
-               /*
-                * Issue an VMGEXIT to the HV to consume the bytes from the
-                * shared buffer or to have it write them into the shared buffer
-                * depending on the instruction: OUTS or INS.
-                */
-               sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
-               ghcb_set_sw_scratch(ghcb, sw_scratch);
-               ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
-                                         exit_info_1, exit_info_2);
-               if (ret != ES_OK)
-                       return ret;
-
-               /* Read bytes from shared buffer into the guest's destination. */
-               if (exit_info_1 & IOIO_TYPE_IN) {
-                       ret = vc_insn_string_write(ctxt,
-                                                  (void *)(es_base + regs->di),
-                                                  ghcb->shared_buffer, io_bytes,
-                                                  exit_info_2, df);
-                       if (ret)
-                               return ret;
-
-                       if (df)
-                               regs->di -= exit_bytes;
-                       else
-                               regs->di += exit_bytes;
-               } else {
-                       if (df)
-                               regs->si -= exit_bytes;
-                       else
-                               regs->si += exit_bytes;
-               }
-
-               if (exit_info_1 & IOIO_REP)
-                       regs->cx -= exit_info_2;
-
-               ret = regs->cx ? ES_RETRY : ES_OK;
-
-       } else {
-
-               /* IN/OUT into/from rAX */
-
-               int bits = (exit_info_1 & 0x70) >> 1;
-               u64 rax = 0;
-
-               if (!(exit_info_1 & IOIO_TYPE_IN))
-                       rax = lower_bits(regs->ax, bits);
-
-               ghcb_set_rax(ghcb, rax);
-
-               ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
-               if (ret != ES_OK)
-                       return ret;
-
-               if (exit_info_1 & IOIO_TYPE_IN) {
-                       if (!ghcb_rax_is_valid(ghcb))
-                               return ES_VMM_ERROR;
-                       regs->ax = lower_bits(ghcb->save.rax, bits);
-               }
-       }
-
-       return ret;
-}
-
-static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
-                                     struct es_em_ctxt *ctxt)
-{
-       struct pt_regs *regs = ctxt->regs;
-       u32 cr4 = native_read_cr4();
-       enum es_result ret;
-
-       ghcb_set_rax(ghcb, regs->ax);
-       ghcb_set_rcx(ghcb, regs->cx);
-
-       if (cr4 & X86_CR4_OSXSAVE)
-               /* Safe to read xcr0 */
-               ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
-       else
-               /* xgetbv will cause #GP - use reset value for xcr0 */
-               ghcb_set_xcr0(ghcb, 1);
-
-       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
-       if (ret != ES_OK)
-               return ret;
-
-       if (!(ghcb_rax_is_valid(ghcb) &&
-             ghcb_rbx_is_valid(ghcb) &&
-             ghcb_rcx_is_valid(ghcb) &&
-             ghcb_rdx_is_valid(ghcb)))
-               return ES_VMM_ERROR;
-
-       regs->ax = ghcb->save.rax;
-       regs->bx = ghcb->save.rbx;
-       regs->cx = ghcb->save.rcx;
-       regs->dx = ghcb->save.rdx;
-
-       return ES_OK;
-}
-
-static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
-                                     struct es_em_ctxt *ctxt,
-                                     unsigned long exit_code)
-{
-       bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
-       enum es_result ret;
-
-       ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
-       if (ret != ES_OK)
-               return ret;
-
-       if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
-            (!rdtscp || ghcb_rcx_is_valid(ghcb))))
-               return ES_VMM_ERROR;
-
-       ctxt->regs->ax = ghcb->save.rax;
-       ctxt->regs->dx = ghcb->save.rdx;
-       if (rdtscp)
-               ctxt->regs->cx = ghcb->save.rcx;
-
-       return ES_OK;
-}
diff --git a/arch/x86/kernel/sev-es.c b/arch/x86/kernel/sev-es.c

deleted file mode 100644 (file)

index 73873b0..0000000
--- a/arch/x86/kernel/sev-es.c
+++ /dev/null
@@ -1,1461 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * AMD Memory Encryption Support
- *
- * Copyright (C) 2019 SUSE
- *
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#define pr_fmt(fmt)    "SEV-ES: " fmt
-
-#include <linux/sched/debug.h> /* For show_regs() */
-#include <linux/percpu-defs.h>
-#include <linux/mem_encrypt.h>
-#include <linux/lockdep.h>
-#include <linux/printk.h>
-#include <linux/mm_types.h>
-#include <linux/set_memory.h>
-#include <linux/memblock.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-
-#include <asm/cpu_entry_area.h>
-#include <asm/stacktrace.h>
-#include <asm/sev-es.h>
-#include <asm/insn-eval.h>
-#include <asm/fpu/internal.h>
-#include <asm/processor.h>
-#include <asm/realmode.h>
-#include <asm/traps.h>
-#include <asm/svm.h>
-#include <asm/smp.h>
-#include <asm/cpu.h>
-
-#define DR7_RESET_VALUE        0x400
-
-/* For early boot hypervisor communication in SEV-ES enabled guests */
-static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
-
-/*
- * Needs to be in the .data section because we need it NULL before bss is
- * cleared
- */
-static struct ghcb __initdata *boot_ghcb;
-
-/* #VC handler runtime per-CPU data */
-struct sev_es_runtime_data {
-       struct ghcb ghcb_page;
-
-       /* Physical storage for the per-CPU IST stack of the #VC handler */
-       char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
-
-       /*
-        * Physical storage for the per-CPU fall-back stack of the #VC handler.
-        * The fall-back stack is used when it is not safe to switch back to the
-        * interrupted stack in the #VC entry code.
-        */
-       char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
-
-       /*
-        * Reserve one page per CPU as backup storage for the unencrypted GHCB.
-        * It is needed when an NMI happens while the #VC handler uses the real
-        * GHCB, and the NMI handler itself is causing another #VC exception. In
-        * that case the GHCB content of the first handler needs to be backed up
-        * and restored.
-        */
-       struct ghcb backup_ghcb;
-
-       /*
-        * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
-        * There is no need for it to be atomic, because nothing is written to
-        * the GHCB between the read and the write of ghcb_active. So it is safe
-        * to use it when a nested #VC exception happens before the write.
-        *
-        * This is necessary for example in the #VC->NMI->#VC case when the NMI
-        * happens while the first #VC handler uses the GHCB. When the NMI code
-        * raises a second #VC handler it might overwrite the contents of the
-        * GHCB written by the first handler. To avoid this the content of the
-        * GHCB is saved and restored when the GHCB is detected to be in use
-        * already.
-        */
-       bool ghcb_active;
-       bool backup_ghcb_active;
-
-       /*
-        * Cached DR7 value - write it on DR7 writes and return it on reads.
-        * That value will never make it to the real hardware DR7 as debugging
-        * is currently unsupported in SEV-ES guests.
-        */
-       unsigned long dr7;
-};
-
-struct ghcb_state {
-       struct ghcb *ghcb;
-};
-
-static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
-DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
-
-/* Needed in vc_early_forward_exception */
-void do_early_exception(struct pt_regs *regs, int trapnr);
-
-static void __init setup_vc_stacks(int cpu)
-{
-       struct sev_es_runtime_data *data;
-       struct cpu_entry_area *cea;
-       unsigned long vaddr;
-       phys_addr_t pa;
-
-       data = per_cpu(runtime_data, cpu);
-       cea  = get_cpu_entry_area(cpu);
-
-       /* Map #VC IST stack */
-       vaddr = CEA_ESTACK_BOT(&cea->estacks, VC);
-       pa    = __pa(data->ist_stack);
-       cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
-
-       /* Map VC fall-back stack */
-       vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2);
-       pa    = __pa(data->fallback_stack);
-       cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
-}
-
-static __always_inline bool on_vc_stack(struct pt_regs *regs)
-{
-       unsigned long sp = regs->sp;
-
-       /* User-mode RSP is not trusted */
-       if (user_mode(regs))
-               return false;
-
-       /* SYSCALL gap still has user-mode RSP */
-       if (ip_within_syscall_gap(regs))
-               return false;
-
-       return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
-}
-
-/*
- * This function handles the case when an NMI is raised in the #VC
- * exception handler entry code, before the #VC handler has switched off
- * its IST stack. In this case, the IST entry for #VC must be adjusted,
- * so that any nested #VC exception will not overwrite the stack
- * contents of the interrupted #VC handler.
- *
- * The IST entry is adjusted unconditionally so that it can be also be
- * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
- * nested sev_es_ist_exit() call may adjust back the IST entry too
- * early.
- *
- * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
- * on the NMI IST stack, as they are only called from NMI handling code
- * right now.
- */
-void noinstr __sev_es_ist_enter(struct pt_regs *regs)
-{
-       unsigned long old_ist, new_ist;
-
-       /* Read old IST entry */
-       new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
-
-       /*
-        * If NMI happened while on the #VC IST stack, set the new IST
-        * value below regs->sp, so that the interrupted stack frame is
-        * not overwritten by subsequent #VC exceptions.
-        */
-       if (on_vc_stack(regs))
-               new_ist = regs->sp;
-
-       /*
-        * Reserve additional 8 bytes and store old IST value so this
-        * adjustment can be unrolled in __sev_es_ist_exit().
-        */
-       new_ist -= sizeof(old_ist);
-       *(unsigned long *)new_ist = old_ist;
-
-       /* Set new IST entry */
-       this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
-}
-
-void noinstr __sev_es_ist_exit(void)
-{
-       unsigned long ist;
-
-       /* Read IST entry */
-       ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
-
-       if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
-               return;
-
-       /* Read back old IST entry and write it to the TSS */
-       this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
-}
-
-static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
-{
-       struct sev_es_runtime_data *data;
-       struct ghcb *ghcb;
-
-       data = this_cpu_read(runtime_data);
-       ghcb = &data->ghcb_page;
-
-       if (unlikely(data->ghcb_active)) {
-               /* GHCB is already in use - save its contents */
-
-               if (unlikely(data->backup_ghcb_active))
-                       return NULL;
-
-               /* Mark backup_ghcb active before writing to it */
-               data->backup_ghcb_active = true;
-
-               state->ghcb = &data->backup_ghcb;
-
-               /* Backup GHCB content */
-               *state->ghcb = *ghcb;
-       } else {
-               state->ghcb = NULL;
-               data->ghcb_active = true;
-       }
-
-       return ghcb;
-}
-
-static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
-{
-       struct sev_es_runtime_data *data;
-       struct ghcb *ghcb;
-
-       data = this_cpu_read(runtime_data);
-       ghcb = &data->ghcb_page;
-
-       if (state->ghcb) {
-               /* Restore GHCB from Backup */
-               *ghcb = *state->ghcb;
-               data->backup_ghcb_active = false;
-               state->ghcb = NULL;
-       } else {
-               data->ghcb_active = false;
-       }
-}
-
-/* Needed in vc_early_forward_exception */
-void do_early_exception(struct pt_regs *regs, int trapnr);
-
-static inline u64 sev_es_rd_ghcb_msr(void)
-{
-       return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
-}
-
-static __always_inline void sev_es_wr_ghcb_msr(u64 val)
-{
-       u32 low, high;
-
-       low  = (u32)(val);
-       high = (u32)(val >> 32);
-
-       native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
-}
-
-static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
-                               unsigned char *buffer)
-{
-       return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
-}
-
-static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
-{
-       char buffer[MAX_INSN_SIZE];
-       int res;
-
-       res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
-       if (!res) {
-               ctxt->fi.vector     = X86_TRAP_PF;
-               ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
-               ctxt->fi.cr2        = ctxt->regs->ip;
-               return ES_EXCEPTION;
-       }
-
-       if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
-               return ES_DECODE_FAILED;
-
-       if (ctxt->insn.immediate.got)
-               return ES_OK;
-       else
-               return ES_DECODE_FAILED;
-}
-
-static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
-{
-       char buffer[MAX_INSN_SIZE];
-       int res, ret;
-
-       res = vc_fetch_insn_kernel(ctxt, buffer);
-       if (res) {
-               ctxt->fi.vector     = X86_TRAP_PF;
-               ctxt->fi.error_code = X86_PF_INSTR;
-               ctxt->fi.cr2        = ctxt->regs->ip;
-               return ES_EXCEPTION;
-       }
-
-       ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
-       if (ret < 0)
-               return ES_DECODE_FAILED;
-       else
-               return ES_OK;
-}
-
-static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
-{
-       if (user_mode(ctxt->regs))
-               return __vc_decode_user_insn(ctxt);
-       else
-               return __vc_decode_kern_insn(ctxt);
-}
-
-static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
-                                  char *dst, char *buf, size_t size)
-{
-       unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
-       char __user *target = (char __user *)dst;
-       u64 d8;
-       u32 d4;
-       u16 d2;
-       u8  d1;
-
-       /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
-       if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
-               memcpy(dst, buf, size);
-               return ES_OK;
-       }
-
-       switch (size) {
-       case 1:
-               memcpy(&d1, buf, 1);
-               if (put_user(d1, target))
-                       goto fault;
-               break;
-       case 2:
-               memcpy(&d2, buf, 2);
-               if (put_user(d2, target))
-                       goto fault;
-               break;
-       case 4:
-               memcpy(&d4, buf, 4);
-               if (put_user(d4, target))
-                       goto fault;
-               break;
-       case 8:
-               memcpy(&d8, buf, 8);
-               if (put_user(d8, target))
-                       goto fault;
-               break;
-       default:
-               WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
-               return ES_UNSUPPORTED;
-       }
-
-       return ES_OK;
-
-fault:
-       if (user_mode(ctxt->regs))
-               error_code |= X86_PF_USER;
-
-       ctxt->fi.vector = X86_TRAP_PF;
-       ctxt->fi.error_code = error_code;
-       ctxt->fi.cr2 = (unsigned long)dst;
-
-       return ES_EXCEPTION;
-}
-
-static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
-                                 char *src, char *buf, size_t size)
-{
-       unsigned long error_code = X86_PF_PROT;
-       char __user *s = (char __user *)src;
-       u64 d8;
-       u32 d4;
-       u16 d2;
-       u8  d1;
-
-       /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
-       if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
-               memcpy(buf, src, size);
-               return ES_OK;
-       }
-
-       switch (size) {
-       case 1:
-               if (get_user(d1, s))
-                       goto fault;
-               memcpy(buf, &d1, 1);
-               break;
-       case 2:
-               if (get_user(d2, s))
-                       goto fault;
-               memcpy(buf, &d2, 2);
-               break;
-       case 4:
-               if (get_user(d4, s))
-                       goto fault;
-               memcpy(buf, &d4, 4);
-               break;
-       case 8:
-               if (get_user(d8, s))
-                       goto fault;
-               memcpy(buf, &d8, 8);
-               break;
-       default:
-               WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
-               return ES_UNSUPPORTED;
-       }
-
-       return ES_OK;
-
-fault:
-       if (user_mode(ctxt->regs))
-               error_code |= X86_PF_USER;
-
-       ctxt->fi.vector = X86_TRAP_PF;
-       ctxt->fi.error_code = error_code;
-       ctxt->fi.cr2 = (unsigned long)src;
-
-       return ES_EXCEPTION;
-}
-
-static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
-                                          unsigned long vaddr, phys_addr_t *paddr)
-{
-       unsigned long va = (unsigned long)vaddr;
-       unsigned int level;
-       phys_addr_t pa;
-       pgd_t *pgd;
-       pte_t *pte;
-
-       pgd = __va(read_cr3_pa());
-       pgd = &pgd[pgd_index(va)];
-       pte = lookup_address_in_pgd(pgd, va, &level);
-       if (!pte) {
-               ctxt->fi.vector     = X86_TRAP_PF;
-               ctxt->fi.cr2        = vaddr;
-               ctxt->fi.error_code = 0;
-
-               if (user_mode(ctxt->regs))
-                       ctxt->fi.error_code |= X86_PF_USER;
-
-               return ES_EXCEPTION;
-       }
-
-       if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
-               /* Emulated MMIO to/from encrypted memory not supported */
-               return ES_UNSUPPORTED;
-
-       pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
-       pa |= va & ~page_level_mask(level);
-
-       *paddr = pa;
-
-       return ES_OK;
-}
-
-/* Include code shared with pre-decompression boot stage */
-#include "sev-es-shared.c"
-
-void noinstr __sev_es_nmi_complete(void)
-{
-       struct ghcb_state state;
-       struct ghcb *ghcb;
-
-       ghcb = sev_es_get_ghcb(&state);
-
-       vc_ghcb_invalidate(ghcb);
-       ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
-       ghcb_set_sw_exit_info_1(ghcb, 0);
-       ghcb_set_sw_exit_info_2(ghcb, 0);
-
-       sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
-       VMGEXIT();
-
-       sev_es_put_ghcb(&state);
-}
-
-static u64 get_jump_table_addr(void)
-{
-       struct ghcb_state state;
-       unsigned long flags;
-       struct ghcb *ghcb;
-       u64 ret = 0;
-
-       local_irq_save(flags);
-
-       ghcb = sev_es_get_ghcb(&state);
-
-       vc_ghcb_invalidate(ghcb);
-       ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
-       ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
-       ghcb_set_sw_exit_info_2(ghcb, 0);
-
-       sev_es_wr_ghcb_msr(__pa(ghcb));
-       VMGEXIT();
-
-       if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
-           ghcb_sw_exit_info_2_is_valid(ghcb))
-               ret = ghcb->save.sw_exit_info_2;
-
-       sev_es_put_ghcb(&state);
-
-       local_irq_restore(flags);
-
-       return ret;
-}
-
-int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
-{
-       u16 startup_cs, startup_ip;
-       phys_addr_t jump_table_pa;
-       u64 jump_table_addr;
-       u16 __iomem *jump_table;
-
-       jump_table_addr = get_jump_table_addr();
-
-       /* On UP guests there is no jump table so this is not a failure */
-       if (!jump_table_addr)
-               return 0;
-
-       /* Check if AP Jump Table is page-aligned */
-       if (jump_table_addr & ~PAGE_MASK)
-               return -EINVAL;
-
-       jump_table_pa = jump_table_addr & PAGE_MASK;
-
-       startup_cs = (u16)(rmh->trampoline_start >> 4);
-       startup_ip = (u16)(rmh->sev_es_trampoline_start -
-                          rmh->trampoline_start);
-
-       jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
-       if (!jump_table)
-               return -EIO;
-
-       writew(startup_ip, &jump_table[0]);
-       writew(startup_cs, &jump_table[1]);
-
-       iounmap(jump_table);
-
-       return 0;
-}
-
-/*
- * This is needed by the OVMF UEFI firmware which will use whatever it finds in
- * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
- * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
- */
-int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
-{
-       struct sev_es_runtime_data *data;
-       unsigned long address, pflags;
-       int cpu;
-       u64 pfn;
-
-       if (!sev_es_active())
-               return 0;
-
-       pflags = _PAGE_NX | _PAGE_RW;
-
-       for_each_possible_cpu(cpu) {
-               data = per_cpu(runtime_data, cpu);
-
-               address = __pa(&data->ghcb_page);
-               pfn = address >> PAGE_SHIFT;
-
-               if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
-                       return 1;
-       }
-
-       return 0;
-}
-
-static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
-       struct pt_regs *regs = ctxt->regs;
-       enum es_result ret;
-       u64 exit_info_1;
-
-       /* Is it a WRMSR? */
-       exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
-
-       ghcb_set_rcx(ghcb, regs->cx);
-       if (exit_info_1) {
-               ghcb_set_rax(ghcb, regs->ax);
-               ghcb_set_rdx(ghcb, regs->dx);
-       }
-
-       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
-
-       if ((ret == ES_OK) && (!exit_info_1)) {
-               regs->ax = ghcb->save.rax;
-               regs->dx = ghcb->save.rdx;
-       }
-
-       return ret;
-}
-
-/*
- * This function runs on the first #VC exception after the kernel
- * switched to virtual addresses.
- */
-static bool __init sev_es_setup_ghcb(void)
-{
-       /* First make sure the hypervisor talks a supported protocol. */
-       if (!sev_es_negotiate_protocol())
-               return false;
-
-       /*
-        * Clear the boot_ghcb. The first exception comes in before the bss
-        * section is cleared.
-        */
-       memset(&boot_ghcb_page, 0, PAGE_SIZE);
-
-       /* Alright - Make the boot-ghcb public */
-       boot_ghcb = &boot_ghcb_page;
-
-       return true;
-}
-
-#ifdef CONFIG_HOTPLUG_CPU
-static void sev_es_ap_hlt_loop(void)
-{
-       struct ghcb_state state;
-       struct ghcb *ghcb;
-
-       ghcb = sev_es_get_ghcb(&state);
-
-       while (true) {
-               vc_ghcb_invalidate(ghcb);
-               ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
-               ghcb_set_sw_exit_info_1(ghcb, 0);
-               ghcb_set_sw_exit_info_2(ghcb, 0);
-
-               sev_es_wr_ghcb_msr(__pa(ghcb));
-               VMGEXIT();
-
-               /* Wakeup signal? */
-               if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
-                   ghcb->save.sw_exit_info_2)
-                       break;
-       }
-
-       sev_es_put_ghcb(&state);
-}
-
-/*
- * Play_dead handler when running under SEV-ES. This is needed because
- * the hypervisor can't deliver an SIPI request to restart the AP.
- * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
- * hypervisor wakes it up again.
- */
-static void sev_es_play_dead(void)
-{
-       play_dead_common();
-
-       /* IRQs now disabled */
-
-       sev_es_ap_hlt_loop();
-
-       /*
-        * If we get here, the VCPU was woken up again. Jump to CPU
-        * startup code to get it back online.
-        */
-       start_cpu0();
-}
-#else  /* CONFIG_HOTPLUG_CPU */
-#define sev_es_play_dead       native_play_dead
-#endif /* CONFIG_HOTPLUG_CPU */
-
-#ifdef CONFIG_SMP
-static void __init sev_es_setup_play_dead(void)
-{
-       smp_ops.play_dead = sev_es_play_dead;
-}
-#else
-static inline void sev_es_setup_play_dead(void) { }
-#endif
-
-static void __init alloc_runtime_data(int cpu)
-{
-       struct sev_es_runtime_data *data;
-
-       data = memblock_alloc(sizeof(*data), PAGE_SIZE);
-       if (!data)
-               panic("Can't allocate SEV-ES runtime data");
-
-       per_cpu(runtime_data, cpu) = data;
-}
-
-static void __init init_ghcb(int cpu)
-{
-       struct sev_es_runtime_data *data;
-       int err;
-
-       data = per_cpu(runtime_data, cpu);
-
-       err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
-                                        sizeof(data->ghcb_page));
-       if (err)
-               panic("Can't map GHCBs unencrypted");
-
-       memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
-
-       data->ghcb_active = false;
-       data->backup_ghcb_active = false;
-}
-
-void __init sev_es_init_vc_handling(void)
-{
-       int cpu;
-
-       BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
-
-       if (!sev_es_active())
-               return;
-
-       if (!sev_es_check_cpu_features())
-               panic("SEV-ES CPU Features missing");
-
-       /* Enable SEV-ES special handling */
-       static_branch_enable(&sev_es_enable_key);
-
-       /* Initialize per-cpu GHCB pages */
-       for_each_possible_cpu(cpu) {
-               alloc_runtime_data(cpu);
-               init_ghcb(cpu);
-               setup_vc_stacks(cpu);
-       }
-
-       sev_es_setup_play_dead();
-
-       /* Secondary CPUs use the runtime #VC handler */
-       initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
-}
-
-static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
-{
-       int trapnr = ctxt->fi.vector;
-
-       if (trapnr == X86_TRAP_PF)
-               native_write_cr2(ctxt->fi.cr2);
-
-       ctxt->regs->orig_ax = ctxt->fi.error_code;
-       do_early_exception(ctxt->regs, trapnr);
-}
-
-static long *vc_insn_get_reg(struct es_em_ctxt *ctxt)
-{
-       long *reg_array;
-       int offset;
-
-       reg_array = (long *)ctxt->regs;
-       offset    = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs);
-
-       if (offset < 0)
-               return NULL;
-
-       offset /= sizeof(long);
-
-       return reg_array + offset;
-}
-
-static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
-{
-       long *reg_array;
-       int offset;
-
-       reg_array = (long *)ctxt->regs;
-       offset    = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
-
-       if (offset < 0)
-               return NULL;
-
-       offset /= sizeof(long);
-
-       return reg_array + offset;
-}
-static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
-                                unsigned int bytes, bool read)
-{
-       u64 exit_code, exit_info_1, exit_info_2;
-       unsigned long ghcb_pa = __pa(ghcb);
-       enum es_result res;
-       phys_addr_t paddr;
-       void __user *ref;
-
-       ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
-       if (ref == (void __user *)-1L)
-               return ES_UNSUPPORTED;
-
-       exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
-
-       res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
-       if (res != ES_OK) {
-               if (res == ES_EXCEPTION && !read)
-                       ctxt->fi.error_code |= X86_PF_WRITE;
-
-               return res;
-       }
-
-       exit_info_1 = paddr;
-       /* Can never be greater than 8 */
-       exit_info_2 = bytes;
-
-       ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
-
-       return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
-}
-
-static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
-                                                struct es_em_ctxt *ctxt)
-{
-       struct insn *insn = &ctxt->insn;
-       unsigned int bytes = 0;
-       enum es_result ret;
-       int sign_byte;
-       long *reg_data;
-
-       switch (insn->opcode.bytes[1]) {
-               /* MMIO Read w/ zero-extension */
-       case 0xb6:
-               bytes = 1;
-               fallthrough;
-       case 0xb7:
-               if (!bytes)
-                       bytes = 2;
-
-               ret = vc_do_mmio(ghcb, ctxt, bytes, true);
-               if (ret)
-                       break;
-
-               /* Zero extend based on operand size */
-               reg_data = vc_insn_get_reg(ctxt);
-               if (!reg_data)
-                       return ES_DECODE_FAILED;
-
-               memset(reg_data, 0, insn->opnd_bytes);
-
-               memcpy(reg_data, ghcb->shared_buffer, bytes);
-               break;
-
-               /* MMIO Read w/ sign-extension */
-       case 0xbe:
-               bytes = 1;
-               fallthrough;
-       case 0xbf:
-               if (!bytes)
-                       bytes = 2;
-
-               ret = vc_do_mmio(ghcb, ctxt, bytes, true);
-               if (ret)
-                       break;
-
-               /* Sign extend based on operand size */
-               reg_data = vc_insn_get_reg(ctxt);
-               if (!reg_data)
-                       return ES_DECODE_FAILED;
-
-               if (bytes == 1) {
-                       u8 *val = (u8 *)ghcb->shared_buffer;
-
-                       sign_byte = (*val & 0x80) ? 0xff : 0x00;
-               } else {
-                       u16 *val = (u16 *)ghcb->shared_buffer;
-
-                       sign_byte = (*val & 0x8000) ? 0xff : 0x00;
-               }
-               memset(reg_data, sign_byte, insn->opnd_bytes);
-
-               memcpy(reg_data, ghcb->shared_buffer, bytes);
-               break;
-
-       default:
-               ret = ES_UNSUPPORTED;
-       }
-
-       return ret;
-}
-
-/*
- * The MOVS instruction has two memory operands, which raises the
- * problem that it is not known whether the access to the source or the
- * destination caused the #VC exception (and hence whether an MMIO read
- * or write operation needs to be emulated).
- *
- * Instead of playing games with walking page-tables and trying to guess
- * whether the source or destination is an MMIO range, split the move
- * into two operations, a read and a write with only one memory operand.
- * This will cause a nested #VC exception on the MMIO address which can
- * then be handled.
- *
- * This implementation has the benefit that it also supports MOVS where
- * source _and_ destination are MMIO regions.
- *
- * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
- * rare operation. If it turns out to be a performance problem the split
- * operations can be moved to memcpy_fromio() and memcpy_toio().
- */
-static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
-                                         unsigned int bytes)
-{
-       unsigned long ds_base, es_base;
-       unsigned char *src, *dst;
-       unsigned char buffer[8];
-       enum es_result ret;
-       bool rep;
-       int off;
-
-       ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
-       es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
-
-       if (ds_base == -1L || es_base == -1L) {
-               ctxt->fi.vector = X86_TRAP_GP;
-               ctxt->fi.error_code = 0;
-               return ES_EXCEPTION;
-       }
-
-       src = ds_base + (unsigned char *)ctxt->regs->si;
-       dst = es_base + (unsigned char *)ctxt->regs->di;
-
-       ret = vc_read_mem(ctxt, src, buffer, bytes);
-       if (ret != ES_OK)
-               return ret;
-
-       ret = vc_write_mem(ctxt, dst, buffer, bytes);
-       if (ret != ES_OK)
-               return ret;
-
-       if (ctxt->regs->flags & X86_EFLAGS_DF)
-               off = -bytes;
-       else
-               off =  bytes;
-
-       ctxt->regs->si += off;
-       ctxt->regs->di += off;
-
-       rep = insn_has_rep_prefix(&ctxt->insn);
-       if (rep)
-               ctxt->regs->cx -= 1;
-
-       if (!rep || ctxt->regs->cx == 0)
-               return ES_OK;
-       else
-               return ES_RETRY;
-}
-
-static enum es_result vc_handle_mmio(struct ghcb *ghcb,
-                                    struct es_em_ctxt *ctxt)
-{
-       struct insn *insn = &ctxt->insn;
-       unsigned int bytes = 0;
-       enum es_result ret;
-       long *reg_data;
-
-       switch (insn->opcode.bytes[0]) {
-       /* MMIO Write */
-       case 0x88:
-               bytes = 1;
-               fallthrough;
-       case 0x89:
-               if (!bytes)
-                       bytes = insn->opnd_bytes;
-
-               reg_data = vc_insn_get_reg(ctxt);
-               if (!reg_data)
-                       return ES_DECODE_FAILED;
-
-               memcpy(ghcb->shared_buffer, reg_data, bytes);
-
-               ret = vc_do_mmio(ghcb, ctxt, bytes, false);
-               break;
-
-       case 0xc6:
-               bytes = 1;
-               fallthrough;
-       case 0xc7:
-               if (!bytes)
-                       bytes = insn->opnd_bytes;
-
-               memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
-
-               ret = vc_do_mmio(ghcb, ctxt, bytes, false);
-               break;
-
-               /* MMIO Read */
-       case 0x8a:
-               bytes = 1;
-               fallthrough;
-       case 0x8b:
-               if (!bytes)
-                       bytes = insn->opnd_bytes;
-
-               ret = vc_do_mmio(ghcb, ctxt, bytes, true);
-               if (ret)
-                       break;
-
-               reg_data = vc_insn_get_reg(ctxt);
-               if (!reg_data)
-                       return ES_DECODE_FAILED;
-
-               /* Zero-extend for 32-bit operation */
-               if (bytes == 4)
-                       *reg_data = 0;
-
-               memcpy(reg_data, ghcb->shared_buffer, bytes);
-               break;
-
-               /* MOVS instruction */
-       case 0xa4:
-               bytes = 1;
-               fallthrough;
-       case 0xa5:
-               if (!bytes)
-                       bytes = insn->opnd_bytes;
-
-               ret = vc_handle_mmio_movs(ctxt, bytes);
-               break;
-               /* Two-Byte Opcodes */
-       case 0x0f:
-               ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt);
-               break;
-       default:
-               ret = ES_UNSUPPORTED;
-       }
-
-       return ret;
-}
-
-static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
-                                         struct es_em_ctxt *ctxt)
-{
-       struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
-       long val, *reg = vc_insn_get_rm(ctxt);
-       enum es_result ret;
-
-       if (!reg)
-               return ES_DECODE_FAILED;
-
-       val = *reg;
-
-       /* Upper 32 bits must be written as zeroes */
-       if (val >> 32) {
-               ctxt->fi.vector = X86_TRAP_GP;
-               ctxt->fi.error_code = 0;
-               return ES_EXCEPTION;
-       }
-
-       /* Clear out other reserved bits and set bit 10 */
-       val = (val & 0xffff23ffL) | BIT(10);
-
-       /* Early non-zero writes to DR7 are not supported */
-       if (!data && (val & ~DR7_RESET_VALUE))
-               return ES_UNSUPPORTED;
-
-       /* Using a value of 0 for ExitInfo1 means RAX holds the value */
-       ghcb_set_rax(ghcb, val);
-       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
-       if (ret != ES_OK)
-               return ret;
-
-       if (data)
-               data->dr7 = val;
-
-       return ES_OK;
-}
-
-static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
-                                        struct es_em_ctxt *ctxt)
-{
-       struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
-       long *reg = vc_insn_get_rm(ctxt);
-
-       if (!reg)
-               return ES_DECODE_FAILED;
-
-       if (data)
-               *reg = data->dr7;
-       else
-               *reg = DR7_RESET_VALUE;
-
-       return ES_OK;
-}
-
-static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
-                                      struct es_em_ctxt *ctxt)
-{
-       return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
-}
-
-static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
-{
-       enum es_result ret;
-
-       ghcb_set_rcx(ghcb, ctxt->regs->cx);
-
-       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
-       if (ret != ES_OK)
-               return ret;
-
-       if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
-               return ES_VMM_ERROR;
-
-       ctxt->regs->ax = ghcb->save.rax;
-       ctxt->regs->dx = ghcb->save.rdx;
-
-       return ES_OK;
-}
-
-static enum es_result vc_handle_monitor(struct ghcb *ghcb,
-                                       struct es_em_ctxt *ctxt)
-{
-       /*
-        * Treat it as a NOP and do not leak a physical address to the
-        * hypervisor.
-        */
-       return ES_OK;
-}
-
-static enum es_result vc_handle_mwait(struct ghcb *ghcb,
-                                     struct es_em_ctxt *ctxt)
-{
-       /* Treat the same as MONITOR/MONITORX */
-       return ES_OK;
-}
-
-static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
-                                       struct es_em_ctxt *ctxt)
-{
-       enum es_result ret;
-
-       ghcb_set_rax(ghcb, ctxt->regs->ax);
-       ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
-
-       if (x86_platform.hyper.sev_es_hcall_prepare)
-               x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
-
-       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
-       if (ret != ES_OK)
-               return ret;
-
-       if (!ghcb_rax_is_valid(ghcb))
-               return ES_VMM_ERROR;
-
-       ctxt->regs->ax = ghcb->save.rax;
-
-       /*
-        * Call sev_es_hcall_finish() after regs->ax is already set.
-        * This allows the hypervisor handler to overwrite it again if
-        * necessary.
-        */
-       if (x86_platform.hyper.sev_es_hcall_finish &&
-           !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
-               return ES_VMM_ERROR;
-
-       return ES_OK;
-}
-
-static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
-                                       struct es_em_ctxt *ctxt)
-{
-       /*
-        * Calling ecx_alignment_check() directly does not work, because it
-        * enables IRQs and the GHCB is active. Forward the exception and call
-        * it later from vc_forward_exception().
-        */
-       ctxt->fi.vector = X86_TRAP_AC;
-       ctxt->fi.error_code = 0;
-       return ES_EXCEPTION;
-}
-
-static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
-{
-       if (user_mode(regs))
-               noist_exc_debug(regs);
-       else
-               exc_debug(regs);
-}
-
-static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
-                                        struct ghcb *ghcb,
-                                        unsigned long exit_code)
-{
-       enum es_result result;
-
-       switch (exit_code) {
-       case SVM_EXIT_READ_DR7:
-               result = vc_handle_dr7_read(ghcb, ctxt);
-               break;
-       case SVM_EXIT_WRITE_DR7:
-               result = vc_handle_dr7_write(ghcb, ctxt);
-               break;
-       case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
-               result = vc_handle_trap_ac(ghcb, ctxt);
-               break;
-       case SVM_EXIT_RDTSC:
-       case SVM_EXIT_RDTSCP:
-               result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
-               break;
-       case SVM_EXIT_RDPMC:
-               result = vc_handle_rdpmc(ghcb, ctxt);
-               break;
-       case SVM_EXIT_INVD:
-               pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
-               result = ES_UNSUPPORTED;
-               break;
-       case SVM_EXIT_CPUID:
-               result = vc_handle_cpuid(ghcb, ctxt);
-               break;
-       case SVM_EXIT_IOIO:
-               result = vc_handle_ioio(ghcb, ctxt);
-               break;
-       case SVM_EXIT_MSR:
-               result = vc_handle_msr(ghcb, ctxt);
-               break;
-       case SVM_EXIT_VMMCALL:
-               result = vc_handle_vmmcall(ghcb, ctxt);
-               break;
-       case SVM_EXIT_WBINVD:
-               result = vc_handle_wbinvd(ghcb, ctxt);
-               break;
-       case SVM_EXIT_MONITOR:
-               result = vc_handle_monitor(ghcb, ctxt);
-               break;
-       case SVM_EXIT_MWAIT:
-               result = vc_handle_mwait(ghcb, ctxt);
-               break;
-       case SVM_EXIT_NPF:
-               result = vc_handle_mmio(ghcb, ctxt);
-               break;
-       default:
-               /*
-                * Unexpected #VC exception
-                */
-               result = ES_UNSUPPORTED;
-       }
-
-       return result;
-}
-
-static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
-{
-       long error_code = ctxt->fi.error_code;
-       int trapnr = ctxt->fi.vector;
-
-       ctxt->regs->orig_ax = ctxt->fi.error_code;
-
-       switch (trapnr) {
-       case X86_TRAP_GP:
-               exc_general_protection(ctxt->regs, error_code);
-               break;
-       case X86_TRAP_UD:
-               exc_invalid_op(ctxt->regs);
-               break;
-       case X86_TRAP_AC:
-               exc_alignment_check(ctxt->regs, error_code);
-               break;
-       default:
-               pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
-               BUG();
-       }
-}
-
-static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
-{
-       unsigned long sp = (unsigned long)regs;
-
-       return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
-}
-
-/*
- * Main #VC exception handler. It is called when the entry code was able to
- * switch off the IST to a safe kernel stack.
- *
- * With the current implementation it is always possible to switch to a safe
- * stack because #VC exceptions only happen at known places, like intercepted
- * instructions or accesses to MMIO areas/IO ports. They can also happen with
- * code instrumentation when the hypervisor intercepts #DB, but the critical
- * paths are forbidden to be instrumented, so #DB exceptions currently also
- * only happen in safe places.
- */
-DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
-{
-       struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
-       irqentry_state_t irq_state;
-       struct ghcb_state state;
-       struct es_em_ctxt ctxt;
-       enum es_result result;
-       struct ghcb *ghcb;
-
-       /*
-        * Handle #DB before calling into !noinstr code to avoid recursive #DB.
-        */
-       if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
-               vc_handle_trap_db(regs);
-               return;
-       }
-
-       irq_state = irqentry_nmi_enter(regs);
-       lockdep_assert_irqs_disabled();
-       instrumentation_begin();
-
-       /*
-        * This is invoked through an interrupt gate, so IRQs are disabled. The
-        * code below might walk page-tables for user or kernel addresses, so
-        * keep the IRQs disabled to protect us against concurrent TLB flushes.
-        */
-
-       ghcb = sev_es_get_ghcb(&state);
-       if (!ghcb) {
-               /*
-                * Mark GHCBs inactive so that panic() is able to print the
-                * message.
-                */
-               data->ghcb_active        = false;
-               data->backup_ghcb_active = false;
-
-               panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
-       }
-
-       vc_ghcb_invalidate(ghcb);
-       result = vc_init_em_ctxt(&ctxt, regs, error_code);
-
-       if (result == ES_OK)
-               result = vc_handle_exitcode(&ctxt, ghcb, error_code);
-
-       sev_es_put_ghcb(&state);
-
-       /* Done - now check the result */
-       switch (result) {
-       case ES_OK:
-               vc_finish_insn(&ctxt);
-               break;
-       case ES_UNSUPPORTED:
-               pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
-                                  error_code, regs->ip);
-               goto fail;
-       case ES_VMM_ERROR:
-               pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
-                                  error_code, regs->ip);
-               goto fail;
-       case ES_DECODE_FAILED:
-               pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
-                                  error_code, regs->ip);
-               goto fail;
-       case ES_EXCEPTION:
-               vc_forward_exception(&ctxt);
-               break;
-       case ES_RETRY:
-               /* Nothing to do */
-               break;
-       default:
-               pr_emerg("Unknown result in %s():%d\n", __func__, result);
-               /*
-                * Emulating the instruction which caused the #VC exception
-                * failed - can't continue so print debug information
-                */
-               BUG();
-       }
-
-out:
-       instrumentation_end();
-       irqentry_nmi_exit(regs, irq_state);
-
-       return;
-
-fail:
-       if (user_mode(regs)) {
-               /*
-                * Do not kill the machine if user-space triggered the
-                * exception. Send SIGBUS instead and let user-space deal with
-                * it.
-                */
-               force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
-       } else {
-               pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
-                        result);
-
-               /* Show some debug info */
-               show_regs(regs);
-
-               /* Ask hypervisor to sev_es_terminate */
-               sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-
-               /* If that fails and we get here - just panic */
-               panic("Returned from Terminate-Request to Hypervisor\n");
-       }
-
-       goto out;
-}
-
-/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
-DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
-{
-       instrumentation_begin();
-       panic("Can't handle #VC exception from unsupported context\n");
-       instrumentation_end();
-}
-
-DEFINE_IDTENTRY_VC(exc_vmm_communication)
-{
-       if (likely(!on_vc_fallback_stack(regs)))
-               safe_stack_exc_vmm_communication(regs, error_code);
-       else
-               ist_exc_vmm_communication(regs, error_code);
-}
-
-bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
-{
-       unsigned long exit_code = regs->orig_ax;
-       struct es_em_ctxt ctxt;
-       enum es_result result;
-
-       /* Do initial setup or terminate the guest */
-       if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
-               sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
-
-       vc_ghcb_invalidate(boot_ghcb);
-
-       result = vc_init_em_ctxt(&ctxt, regs, exit_code);
-       if (result == ES_OK)
-               result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
-
-       /* Done - now check the result */
-       switch (result) {
-       case ES_OK:
-               vc_finish_insn(&ctxt);
-               break;
-       case ES_UNSUPPORTED:
-               early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
-                               exit_code, regs->ip);
-               goto fail;
-       case ES_VMM_ERROR:
-               early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
-                               exit_code, regs->ip);
-               goto fail;
-       case ES_DECODE_FAILED:
-               early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
-                               exit_code, regs->ip);
-               goto fail;
-       case ES_EXCEPTION:
-               vc_early_forward_exception(&ctxt);
-               break;
-       case ES_RETRY:
-               /* Nothing to do */
-               break;
-       default:
-               BUG();
-       }
-
-       return true;
-
-fail:
-       show_regs(regs);
-
-       while (true)
-               halt();
-}
diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c

new file mode 100644 (file)

index 0000000..6ec8b3b
--- /dev/null
+++ b/arch/x86/kernel/sev-shared.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * AMD Encrypted Register State Support
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ *
+ * This file is not compiled stand-alone. It contains code shared
+ * between the pre-decompression boot code and the running Linux kernel
+ * and is included directly into both code-bases.
+ */
+
+#ifndef __BOOT_COMPRESSED
+#define error(v)       pr_err(v)
+#define has_cpuflag(f) boot_cpu_has(f)
+#endif
+
+static bool __init sev_es_check_cpu_features(void)
+{
+       if (!has_cpuflag(X86_FEATURE_RDRAND)) {
+               error("RDRAND instruction not supported - no trusted source of randomness available\n");
+               return false;
+       }
+
+       return true;
+}
+
+static void __noreturn sev_es_terminate(unsigned int reason)
+{
+       u64 val = GHCB_MSR_TERM_REQ;
+
+       /*
+        * Tell the hypervisor what went wrong - only reason-set 0 is
+        * currently supported.
+        */
+       val |= GHCB_SEV_TERM_REASON(0, reason);
+
+       /* Request Guest Termination from Hypvervisor */
+       sev_es_wr_ghcb_msr(val);
+       VMGEXIT();
+
+       while (true)
+               asm volatile("hlt\n" : : : "memory");
+}
+
+static bool sev_es_negotiate_protocol(void)
+{
+       u64 val;
+
+       /* Do the GHCB protocol version negotiation */
+       sev_es_wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
+       VMGEXIT();
+       val = sev_es_rd_ghcb_msr();
+
+       if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
+               return false;
+
+       if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTO_OUR ||
+           GHCB_MSR_PROTO_MIN(val) > GHCB_PROTO_OUR)
+               return false;
+
+       return true;
+}
+
+static __always_inline void vc_ghcb_invalidate(struct ghcb *ghcb)
+{
+       memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
+}
+
+static bool vc_decoding_needed(unsigned long exit_code)
+{
+       /* Exceptions don't require to decode the instruction */
+       return !(exit_code >= SVM_EXIT_EXCP_BASE &&
+                exit_code <= SVM_EXIT_LAST_EXCP);
+}
+
+static enum es_result vc_init_em_ctxt(struct es_em_ctxt *ctxt,
+                                     struct pt_regs *regs,
+                                     unsigned long exit_code)
+{
+       enum es_result ret = ES_OK;
+
+       memset(ctxt, 0, sizeof(*ctxt));
+       ctxt->regs = regs;
+
+       if (vc_decoding_needed(exit_code))
+               ret = vc_decode_insn(ctxt);
+
+       return ret;
+}
+
+static void vc_finish_insn(struct es_em_ctxt *ctxt)
+{
+       ctxt->regs->ip += ctxt->insn.length;
+}
+
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+                                         struct es_em_ctxt *ctxt,
+                                         u64 exit_code, u64 exit_info_1,
+                                         u64 exit_info_2)
+{
+       enum es_result ret;
+
+       /* Fill in protocol and format specifiers */
+       ghcb->protocol_version = GHCB_PROTOCOL_MAX;
+       ghcb->ghcb_usage       = GHCB_DEFAULT_USAGE;
+
+       ghcb_set_sw_exit_code(ghcb, exit_code);
+       ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+       ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+       sev_es_wr_ghcb_msr(__pa(ghcb));
+       VMGEXIT();
+
+       if ((ghcb->save.sw_exit_info_1 & 0xffffffff) == 1) {
+               u64 info = ghcb->save.sw_exit_info_2;
+               unsigned long v;
+
+               info = ghcb->save.sw_exit_info_2;
+               v = info & SVM_EVTINJ_VEC_MASK;
+
+               /* Check if exception information from hypervisor is sane. */
+               if ((info & SVM_EVTINJ_VALID) &&
+                   ((v == X86_TRAP_GP) || (v == X86_TRAP_UD)) &&
+                   ((info & SVM_EVTINJ_TYPE_MASK) == SVM_EVTINJ_TYPE_EXEPT)) {
+                       ctxt->fi.vector = v;
+                       if (info & SVM_EVTINJ_VALID_ERR)
+                               ctxt->fi.error_code = info >> 32;
+                       ret = ES_EXCEPTION;
+               } else {
+                       ret = ES_VMM_ERROR;
+               }
+       } else {
+               ret = ES_OK;
+       }
+
+       return ret;
+}
+
+/*
+ * Boot VC Handler - This is the first VC handler during boot, there is no GHCB
+ * page yet, so it only supports the MSR based communication with the
+ * hypervisor and only the CPUID exit-code.
+ */
+void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code)
+{
+       unsigned int fn = lower_bits(regs->ax, 32);
+       unsigned long val;
+
+       /* Only CPUID is supported via MSR protocol */
+       if (exit_code != SVM_EXIT_CPUID)
+               goto fail;
+
+       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EAX));
+       VMGEXIT();
+       val = sev_es_rd_ghcb_msr();
+       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+               goto fail;
+       regs->ax = val >> 32;
+
+       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EBX));
+       VMGEXIT();
+       val = sev_es_rd_ghcb_msr();
+       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+               goto fail;
+       regs->bx = val >> 32;
+
+       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_ECX));
+       VMGEXIT();
+       val = sev_es_rd_ghcb_msr();
+       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+               goto fail;
+       regs->cx = val >> 32;
+
+       sev_es_wr_ghcb_msr(GHCB_CPUID_REQ(fn, GHCB_CPUID_REQ_EDX));
+       VMGEXIT();
+       val = sev_es_rd_ghcb_msr();
+       if (GHCB_RESP_CODE(val) != GHCB_MSR_CPUID_RESP)
+               goto fail;
+       regs->dx = val >> 32;
+
+       /*
+        * This is a VC handler and the #VC is only raised when SEV-ES is
+        * active, which means SEV must be active too. Do sanity checks on the
+        * CPUID results to make sure the hypervisor does not trick the kernel
+        * into the no-sev path. This could map sensitive data unencrypted and
+        * make it accessible to the hypervisor.
+        *
+        * In particular, check for:
+        *      - Availability of CPUID leaf 0x8000001f
+        *      - SEV CPUID bit.
+        *
+        * The hypervisor might still report the wrong C-bit position, but this
+        * can't be checked here.
+        */
+
+       if (fn == 0x80000000 && (regs->ax < 0x8000001f))
+               /* SEV leaf check */
+               goto fail;
+       else if ((fn == 0x8000001f && !(regs->ax & BIT(1))))
+               /* SEV bit */
+               goto fail;
+
+       /* Skip over the CPUID two-byte opcode */
+       regs->ip += 2;
+
+       return;
+
+fail:
+       /* Terminate the guest */
+       sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+}
+
+static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt,
+                                         void *src, char *buf,
+                                         unsigned int data_size,
+                                         unsigned int count,
+                                         bool backwards)
+{
+       int i, b = backwards ? -1 : 1;
+       enum es_result ret = ES_OK;
+
+       for (i = 0; i < count; i++) {
+               void *s = src + (i * data_size * b);
+               char *d = buf + (i * data_size);
+
+               ret = vc_read_mem(ctxt, s, d, data_size);
+               if (ret != ES_OK)
+                       break;
+       }
+
+       return ret;
+}
+
+static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt,
+                                          void *dst, char *buf,
+                                          unsigned int data_size,
+                                          unsigned int count,
+                                          bool backwards)
+{
+       int i, s = backwards ? -1 : 1;
+       enum es_result ret = ES_OK;
+
+       for (i = 0; i < count; i++) {
+               void *d = dst + (i * data_size * s);
+               char *b = buf + (i * data_size);
+
+               ret = vc_write_mem(ctxt, d, b, data_size);
+               if (ret != ES_OK)
+                       break;
+       }
+
+       return ret;
+}
+
+#define IOIO_TYPE_STR  BIT(2)
+#define IOIO_TYPE_IN   1
+#define IOIO_TYPE_INS  (IOIO_TYPE_IN | IOIO_TYPE_STR)
+#define IOIO_TYPE_OUT  0
+#define IOIO_TYPE_OUTS (IOIO_TYPE_OUT | IOIO_TYPE_STR)
+
+#define IOIO_REP       BIT(3)
+
+#define IOIO_ADDR_64   BIT(9)
+#define IOIO_ADDR_32   BIT(8)
+#define IOIO_ADDR_16   BIT(7)
+
+#define IOIO_DATA_32   BIT(6)
+#define IOIO_DATA_16   BIT(5)
+#define IOIO_DATA_8    BIT(4)
+
+#define IOIO_SEG_ES    (0 << 10)
+#define IOIO_SEG_DS    (3 << 10)
+
+static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo)
+{
+       struct insn *insn = &ctxt->insn;
+       *exitinfo = 0;
+
+       switch (insn->opcode.bytes[0]) {
+       /* INS opcodes */
+       case 0x6c:
+       case 0x6d:
+               *exitinfo |= IOIO_TYPE_INS;
+               *exitinfo |= IOIO_SEG_ES;
+               *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+               break;
+
+       /* OUTS opcodes */
+       case 0x6e:
+       case 0x6f:
+               *exitinfo |= IOIO_TYPE_OUTS;
+               *exitinfo |= IOIO_SEG_DS;
+               *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+               break;
+
+       /* IN immediate opcodes */
+       case 0xe4:
+       case 0xe5:
+               *exitinfo |= IOIO_TYPE_IN;
+               *exitinfo |= (u8)insn->immediate.value << 16;
+               break;
+
+       /* OUT immediate opcodes */
+       case 0xe6:
+       case 0xe7:
+               *exitinfo |= IOIO_TYPE_OUT;
+               *exitinfo |= (u8)insn->immediate.value << 16;
+               break;
+
+       /* IN register opcodes */
+       case 0xec:
+       case 0xed:
+               *exitinfo |= IOIO_TYPE_IN;
+               *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+               break;
+
+       /* OUT register opcodes */
+       case 0xee:
+       case 0xef:
+               *exitinfo |= IOIO_TYPE_OUT;
+               *exitinfo |= (ctxt->regs->dx & 0xffff) << 16;
+               break;
+
+       default:
+               return ES_DECODE_FAILED;
+       }
+
+       switch (insn->opcode.bytes[0]) {
+       case 0x6c:
+       case 0x6e:
+       case 0xe4:
+       case 0xe6:
+       case 0xec:
+       case 0xee:
+               /* Single byte opcodes */
+               *exitinfo |= IOIO_DATA_8;
+               break;
+       default:
+               /* Length determined by instruction parsing */
+               *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16
+                                                    : IOIO_DATA_32;
+       }
+       switch (insn->addr_bytes) {
+       case 2:
+               *exitinfo |= IOIO_ADDR_16;
+               break;
+       case 4:
+               *exitinfo |= IOIO_ADDR_32;
+               break;
+       case 8:
+               *exitinfo |= IOIO_ADDR_64;
+               break;
+       }
+
+       if (insn_has_rep_prefix(insn))
+               *exitinfo |= IOIO_REP;
+
+       return ES_OK;
+}
+
+static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+       struct pt_regs *regs = ctxt->regs;
+       u64 exit_info_1, exit_info_2;
+       enum es_result ret;
+
+       ret = vc_ioio_exitinfo(ctxt, &exit_info_1);
+       if (ret != ES_OK)
+               return ret;
+
+       if (exit_info_1 & IOIO_TYPE_STR) {
+
+               /* (REP) INS/OUTS */
+
+               bool df = ((regs->flags & X86_EFLAGS_DF) == X86_EFLAGS_DF);
+               unsigned int io_bytes, exit_bytes;
+               unsigned int ghcb_count, op_count;
+               unsigned long es_base;
+               u64 sw_scratch;
+
+               /*
+                * For the string variants with rep prefix the amount of in/out
+                * operations per #VC exception is limited so that the kernel
+                * has a chance to take interrupts and re-schedule while the
+                * instruction is emulated.
+                */
+               io_bytes   = (exit_info_1 >> 4) & 0x7;
+               ghcb_count = sizeof(ghcb->shared_buffer) / io_bytes;
+
+               op_count    = (exit_info_1 & IOIO_REP) ? regs->cx : 1;
+               exit_info_2 = min(op_count, ghcb_count);
+               exit_bytes  = exit_info_2 * io_bytes;
+
+               es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+               /* Read bytes of OUTS into the shared buffer */
+               if (!(exit_info_1 & IOIO_TYPE_IN)) {
+                       ret = vc_insn_string_read(ctxt,
+                                              (void *)(es_base + regs->si),
+                                              ghcb->shared_buffer, io_bytes,
+                                              exit_info_2, df);
+                       if (ret)
+                               return ret;
+               }
+
+               /*
+                * Issue an VMGEXIT to the HV to consume the bytes from the
+                * shared buffer or to have it write them into the shared buffer
+                * depending on the instruction: OUTS or INS.
+                */
+               sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
+               ghcb_set_sw_scratch(ghcb, sw_scratch);
+               ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
+                                         exit_info_1, exit_info_2);
+               if (ret != ES_OK)
+                       return ret;
+
+               /* Read bytes from shared buffer into the guest's destination. */
+               if (exit_info_1 & IOIO_TYPE_IN) {
+                       ret = vc_insn_string_write(ctxt,
+                                                  (void *)(es_base + regs->di),
+                                                  ghcb->shared_buffer, io_bytes,
+                                                  exit_info_2, df);
+                       if (ret)
+                               return ret;
+
+                       if (df)
+                               regs->di -= exit_bytes;
+                       else
+                               regs->di += exit_bytes;
+               } else {
+                       if (df)
+                               regs->si -= exit_bytes;
+                       else
+                               regs->si += exit_bytes;
+               }
+
+               if (exit_info_1 & IOIO_REP)
+                       regs->cx -= exit_info_2;
+
+               ret = regs->cx ? ES_RETRY : ES_OK;
+
+       } else {
+
+               /* IN/OUT into/from rAX */
+
+               int bits = (exit_info_1 & 0x70) >> 1;
+               u64 rax = 0;
+
+               if (!(exit_info_1 & IOIO_TYPE_IN))
+                       rax = lower_bits(regs->ax, bits);
+
+               ghcb_set_rax(ghcb, rax);
+
+               ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
+               if (ret != ES_OK)
+                       return ret;
+
+               if (exit_info_1 & IOIO_TYPE_IN) {
+                       if (!ghcb_rax_is_valid(ghcb))
+                               return ES_VMM_ERROR;
+                       regs->ax = lower_bits(ghcb->save.rax, bits);
+               }
+       }
+
+       return ret;
+}
+
+static enum es_result vc_handle_cpuid(struct ghcb *ghcb,
+                                     struct es_em_ctxt *ctxt)
+{
+       struct pt_regs *regs = ctxt->regs;
+       u32 cr4 = native_read_cr4();
+       enum es_result ret;
+
+       ghcb_set_rax(ghcb, regs->ax);
+       ghcb_set_rcx(ghcb, regs->cx);
+
+       if (cr4 & X86_CR4_OSXSAVE)
+               /* Safe to read xcr0 */
+               ghcb_set_xcr0(ghcb, xgetbv(XCR_XFEATURE_ENABLED_MASK));
+       else
+               /* xgetbv will cause #GP - use reset value for xcr0 */
+               ghcb_set_xcr0(ghcb, 1);
+
+       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
+       if (ret != ES_OK)
+               return ret;
+
+       if (!(ghcb_rax_is_valid(ghcb) &&
+             ghcb_rbx_is_valid(ghcb) &&
+             ghcb_rcx_is_valid(ghcb) &&
+             ghcb_rdx_is_valid(ghcb)))
+               return ES_VMM_ERROR;
+
+       regs->ax = ghcb->save.rax;
+       regs->bx = ghcb->save.rbx;
+       regs->cx = ghcb->save.rcx;
+       regs->dx = ghcb->save.rdx;
+
+       return ES_OK;
+}
+
+static enum es_result vc_handle_rdtsc(struct ghcb *ghcb,
+                                     struct es_em_ctxt *ctxt,
+                                     unsigned long exit_code)
+{
+       bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
+       enum es_result ret;
+
+       ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
+       if (ret != ES_OK)
+               return ret;
+
+       if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb) &&
+            (!rdtscp || ghcb_rcx_is_valid(ghcb))))
+               return ES_VMM_ERROR;
+
+       ctxt->regs->ax = ghcb->save.rax;
+       ctxt->regs->dx = ghcb->save.rdx;
+       if (rdtscp)
+               ctxt->regs->cx = ghcb->save.rcx;
+
+       return ES_OK;
+}
diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c

new file mode 100644 (file)

index 0000000..9578c82
--- /dev/null
+++ b/arch/x86/kernel/sev.c
@@ -0,0 +1,1461 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Memory Encryption Support
+ *
+ * Copyright (C) 2019 SUSE
+ *
+ * Author: Joerg Roedel <jroedel@suse.de>
+ */
+
+#define pr_fmt(fmt)    "SEV-ES: " fmt
+
+#include <linux/sched/debug.h> /* For show_regs() */
+#include <linux/percpu-defs.h>
+#include <linux/mem_encrypt.h>
+#include <linux/lockdep.h>
+#include <linux/printk.h>
+#include <linux/mm_types.h>
+#include <linux/set_memory.h>
+#include <linux/memblock.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+
+#include <asm/cpu_entry_area.h>
+#include <asm/stacktrace.h>
+#include <asm/sev.h>
+#include <asm/insn-eval.h>
+#include <asm/fpu/internal.h>
+#include <asm/processor.h>
+#include <asm/realmode.h>
+#include <asm/traps.h>
+#include <asm/svm.h>
+#include <asm/smp.h>
+#include <asm/cpu.h>
+
+#define DR7_RESET_VALUE        0x400
+
+/* For early boot hypervisor communication in SEV-ES enabled guests */
+static struct ghcb boot_ghcb_page __bss_decrypted __aligned(PAGE_SIZE);
+
+/*
+ * Needs to be in the .data section because we need it NULL before bss is
+ * cleared
+ */
+static struct ghcb __initdata *boot_ghcb;
+
+/* #VC handler runtime per-CPU data */
+struct sev_es_runtime_data {
+       struct ghcb ghcb_page;
+
+       /* Physical storage for the per-CPU IST stack of the #VC handler */
+       char ist_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+
+       /*
+        * Physical storage for the per-CPU fall-back stack of the #VC handler.
+        * The fall-back stack is used when it is not safe to switch back to the
+        * interrupted stack in the #VC entry code.
+        */
+       char fallback_stack[EXCEPTION_STKSZ] __aligned(PAGE_SIZE);
+
+       /*
+        * Reserve one page per CPU as backup storage for the unencrypted GHCB.
+        * It is needed when an NMI happens while the #VC handler uses the real
+        * GHCB, and the NMI handler itself is causing another #VC exception. In
+        * that case the GHCB content of the first handler needs to be backed up
+        * and restored.
+        */
+       struct ghcb backup_ghcb;
+
+       /*
+        * Mark the per-cpu GHCBs as in-use to detect nested #VC exceptions.
+        * There is no need for it to be atomic, because nothing is written to
+        * the GHCB between the read and the write of ghcb_active. So it is safe
+        * to use it when a nested #VC exception happens before the write.
+        *
+        * This is necessary for example in the #VC->NMI->#VC case when the NMI
+        * happens while the first #VC handler uses the GHCB. When the NMI code
+        * raises a second #VC handler it might overwrite the contents of the
+        * GHCB written by the first handler. To avoid this the content of the
+        * GHCB is saved and restored when the GHCB is detected to be in use
+        * already.
+        */
+       bool ghcb_active;
+       bool backup_ghcb_active;
+
+       /*
+        * Cached DR7 value - write it on DR7 writes and return it on reads.
+        * That value will never make it to the real hardware DR7 as debugging
+        * is currently unsupported in SEV-ES guests.
+        */
+       unsigned long dr7;
+};
+
+struct ghcb_state {
+       struct ghcb *ghcb;
+};
+
+static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data);
+DEFINE_STATIC_KEY_FALSE(sev_es_enable_key);
+
+/* Needed in vc_early_forward_exception */
+void do_early_exception(struct pt_regs *regs, int trapnr);
+
+static void __init setup_vc_stacks(int cpu)
+{
+       struct sev_es_runtime_data *data;
+       struct cpu_entry_area *cea;
+       unsigned long vaddr;
+       phys_addr_t pa;
+
+       data = per_cpu(runtime_data, cpu);
+       cea  = get_cpu_entry_area(cpu);
+
+       /* Map #VC IST stack */
+       vaddr = CEA_ESTACK_BOT(&cea->estacks, VC);
+       pa    = __pa(data->ist_stack);
+       cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
+
+       /* Map VC fall-back stack */
+       vaddr = CEA_ESTACK_BOT(&cea->estacks, VC2);
+       pa    = __pa(data->fallback_stack);
+       cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
+}
+
+static __always_inline bool on_vc_stack(struct pt_regs *regs)
+{
+       unsigned long sp = regs->sp;
+
+       /* User-mode RSP is not trusted */
+       if (user_mode(regs))
+               return false;
+
+       /* SYSCALL gap still has user-mode RSP */
+       if (ip_within_syscall_gap(regs))
+               return false;
+
+       return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
+}
+
+/*
+ * This function handles the case when an NMI is raised in the #VC
+ * exception handler entry code, before the #VC handler has switched off
+ * its IST stack. In this case, the IST entry for #VC must be adjusted,
+ * so that any nested #VC exception will not overwrite the stack
+ * contents of the interrupted #VC handler.
+ *
+ * The IST entry is adjusted unconditionally so that it can be also be
+ * unconditionally adjusted back in __sev_es_ist_exit(). Otherwise a
+ * nested sev_es_ist_exit() call may adjust back the IST entry too
+ * early.
+ *
+ * The __sev_es_ist_enter() and __sev_es_ist_exit() functions always run
+ * on the NMI IST stack, as they are only called from NMI handling code
+ * right now.
+ */
+void noinstr __sev_es_ist_enter(struct pt_regs *regs)
+{
+       unsigned long old_ist, new_ist;
+
+       /* Read old IST entry */
+       new_ist = old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+       /*
+        * If NMI happened while on the #VC IST stack, set the new IST
+        * value below regs->sp, so that the interrupted stack frame is
+        * not overwritten by subsequent #VC exceptions.
+        */
+       if (on_vc_stack(regs))
+               new_ist = regs->sp;
+
+       /*
+        * Reserve additional 8 bytes and store old IST value so this
+        * adjustment can be unrolled in __sev_es_ist_exit().
+        */
+       new_ist -= sizeof(old_ist);
+       *(unsigned long *)new_ist = old_ist;
+
+       /* Set new IST entry */
+       this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], new_ist);
+}
+
+void noinstr __sev_es_ist_exit(void)
+{
+       unsigned long ist;
+
+       /* Read IST entry */
+       ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
+
+       if (WARN_ON(ist == __this_cpu_ist_top_va(VC)))
+               return;
+
+       /* Read back old IST entry and write it to the TSS */
+       this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
+}
+
+static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
+{
+       struct sev_es_runtime_data *data;
+       struct ghcb *ghcb;
+
+       data = this_cpu_read(runtime_data);
+       ghcb = &data->ghcb_page;
+
+       if (unlikely(data->ghcb_active)) {
+               /* GHCB is already in use - save its contents */
+
+               if (unlikely(data->backup_ghcb_active))
+                       return NULL;
+
+               /* Mark backup_ghcb active before writing to it */
+               data->backup_ghcb_active = true;
+
+               state->ghcb = &data->backup_ghcb;
+
+               /* Backup GHCB content */
+               *state->ghcb = *ghcb;
+       } else {
+               state->ghcb = NULL;
+               data->ghcb_active = true;
+       }
+
+       return ghcb;
+}
+
+static __always_inline void sev_es_put_ghcb(struct ghcb_state *state)
+{
+       struct sev_es_runtime_data *data;
+       struct ghcb *ghcb;
+
+       data = this_cpu_read(runtime_data);
+       ghcb = &data->ghcb_page;
+
+       if (state->ghcb) {
+               /* Restore GHCB from Backup */
+               *ghcb = *state->ghcb;
+               data->backup_ghcb_active = false;
+               state->ghcb = NULL;
+       } else {
+               data->ghcb_active = false;
+       }
+}
+
+/* Needed in vc_early_forward_exception */
+void do_early_exception(struct pt_regs *regs, int trapnr);
+
+static inline u64 sev_es_rd_ghcb_msr(void)
+{
+       return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static __always_inline void sev_es_wr_ghcb_msr(u64 val)
+{
+       u32 low, high;
+
+       low  = (u32)(val);
+       high = (u32)(val >> 32);
+
+       native_wrmsr(MSR_AMD64_SEV_ES_GHCB, low, high);
+}
+
+static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
+                               unsigned char *buffer)
+{
+       return copy_from_kernel_nofault(buffer, (unsigned char *)ctxt->regs->ip, MAX_INSN_SIZE);
+}
+
+static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
+{
+       char buffer[MAX_INSN_SIZE];
+       int res;
+
+       res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
+       if (!res) {
+               ctxt->fi.vector     = X86_TRAP_PF;
+               ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
+               ctxt->fi.cr2        = ctxt->regs->ip;
+               return ES_EXCEPTION;
+       }
+
+       if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res))
+               return ES_DECODE_FAILED;
+
+       if (ctxt->insn.immediate.got)
+               return ES_OK;
+       else
+               return ES_DECODE_FAILED;
+}
+
+static enum es_result __vc_decode_kern_insn(struct es_em_ctxt *ctxt)
+{
+       char buffer[MAX_INSN_SIZE];
+       int res, ret;
+
+       res = vc_fetch_insn_kernel(ctxt, buffer);
+       if (res) {
+               ctxt->fi.vector     = X86_TRAP_PF;
+               ctxt->fi.error_code = X86_PF_INSTR;
+               ctxt->fi.cr2        = ctxt->regs->ip;
+               return ES_EXCEPTION;
+       }
+
+       ret = insn_decode(&ctxt->insn, buffer, MAX_INSN_SIZE, INSN_MODE_64);
+       if (ret < 0)
+               return ES_DECODE_FAILED;
+       else
+               return ES_OK;
+}
+
+static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
+{
+       if (user_mode(ctxt->regs))
+               return __vc_decode_user_insn(ctxt);
+       else
+               return __vc_decode_kern_insn(ctxt);
+}
+
+static enum es_result vc_write_mem(struct es_em_ctxt *ctxt,
+                                  char *dst, char *buf, size_t size)
+{
+       unsigned long error_code = X86_PF_PROT | X86_PF_WRITE;
+       char __user *target = (char __user *)dst;
+       u64 d8;
+       u32 d4;
+       u16 d2;
+       u8  d1;
+
+       /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
+       if (!user_mode(ctxt->regs) && !access_ok(target, size)) {
+               memcpy(dst, buf, size);
+               return ES_OK;
+       }
+
+       switch (size) {
+       case 1:
+               memcpy(&d1, buf, 1);
+               if (put_user(d1, target))
+                       goto fault;
+               break;
+       case 2:
+               memcpy(&d2, buf, 2);
+               if (put_user(d2, target))
+                       goto fault;
+               break;
+       case 4:
+               memcpy(&d4, buf, 4);
+               if (put_user(d4, target))
+                       goto fault;
+               break;
+       case 8:
+               memcpy(&d8, buf, 8);
+               if (put_user(d8, target))
+                       goto fault;
+               break;
+       default:
+               WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+               return ES_UNSUPPORTED;
+       }
+
+       return ES_OK;
+
+fault:
+       if (user_mode(ctxt->regs))
+               error_code |= X86_PF_USER;
+
+       ctxt->fi.vector = X86_TRAP_PF;
+       ctxt->fi.error_code = error_code;
+       ctxt->fi.cr2 = (unsigned long)dst;
+
+       return ES_EXCEPTION;
+}
+
+static enum es_result vc_read_mem(struct es_em_ctxt *ctxt,
+                                 char *src, char *buf, size_t size)
+{
+       unsigned long error_code = X86_PF_PROT;
+       char __user *s = (char __user *)src;
+       u64 d8;
+       u32 d4;
+       u16 d2;
+       u8  d1;
+
+       /* If instruction ran in kernel mode and the I/O buffer is in kernel space */
+       if (!user_mode(ctxt->regs) && !access_ok(s, size)) {
+               memcpy(buf, src, size);
+               return ES_OK;
+       }
+
+       switch (size) {
+       case 1:
+               if (get_user(d1, s))
+                       goto fault;
+               memcpy(buf, &d1, 1);
+               break;
+       case 2:
+               if (get_user(d2, s))
+                       goto fault;
+               memcpy(buf, &d2, 2);
+               break;
+       case 4:
+               if (get_user(d4, s))
+                       goto fault;
+               memcpy(buf, &d4, 4);
+               break;
+       case 8:
+               if (get_user(d8, s))
+                       goto fault;
+               memcpy(buf, &d8, 8);
+               break;
+       default:
+               WARN_ONCE(1, "%s: Invalid size: %zu\n", __func__, size);
+               return ES_UNSUPPORTED;
+       }
+
+       return ES_OK;
+
+fault:
+       if (user_mode(ctxt->regs))
+               error_code |= X86_PF_USER;
+
+       ctxt->fi.vector = X86_TRAP_PF;
+       ctxt->fi.error_code = error_code;
+       ctxt->fi.cr2 = (unsigned long)src;
+
+       return ES_EXCEPTION;
+}
+
+static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+                                          unsigned long vaddr, phys_addr_t *paddr)
+{
+       unsigned long va = (unsigned long)vaddr;
+       unsigned int level;
+       phys_addr_t pa;
+       pgd_t *pgd;
+       pte_t *pte;
+
+       pgd = __va(read_cr3_pa());
+       pgd = &pgd[pgd_index(va)];
+       pte = lookup_address_in_pgd(pgd, va, &level);
+       if (!pte) {
+               ctxt->fi.vector     = X86_TRAP_PF;
+               ctxt->fi.cr2        = vaddr;
+               ctxt->fi.error_code = 0;
+
+               if (user_mode(ctxt->regs))
+                       ctxt->fi.error_code |= X86_PF_USER;
+
+               return ES_EXCEPTION;
+       }
+
+       if (WARN_ON_ONCE(pte_val(*pte) & _PAGE_ENC))
+               /* Emulated MMIO to/from encrypted memory not supported */
+               return ES_UNSUPPORTED;
+
+       pa = (phys_addr_t)pte_pfn(*pte) << PAGE_SHIFT;
+       pa |= va & ~page_level_mask(level);
+
+       *paddr = pa;
+
+       return ES_OK;
+}
+
+/* Include code shared with pre-decompression boot stage */
+#include "sev-shared.c"
+
+void noinstr __sev_es_nmi_complete(void)
+{
+       struct ghcb_state state;
+       struct ghcb *ghcb;
+
+       ghcb = sev_es_get_ghcb(&state);
+
+       vc_ghcb_invalidate(ghcb);
+       ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
+       ghcb_set_sw_exit_info_1(ghcb, 0);
+       ghcb_set_sw_exit_info_2(ghcb, 0);
+
+       sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
+       VMGEXIT();
+
+       sev_es_put_ghcb(&state);
+}
+
+static u64 get_jump_table_addr(void)
+{
+       struct ghcb_state state;
+       unsigned long flags;
+       struct ghcb *ghcb;
+       u64 ret = 0;
+
+       local_irq_save(flags);
+
+       ghcb = sev_es_get_ghcb(&state);
+
+       vc_ghcb_invalidate(ghcb);
+       ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
+       ghcb_set_sw_exit_info_1(ghcb, SVM_VMGEXIT_GET_AP_JUMP_TABLE);
+       ghcb_set_sw_exit_info_2(ghcb, 0);
+
+       sev_es_wr_ghcb_msr(__pa(ghcb));
+       VMGEXIT();
+
+       if (ghcb_sw_exit_info_1_is_valid(ghcb) &&
+           ghcb_sw_exit_info_2_is_valid(ghcb))
+               ret = ghcb->save.sw_exit_info_2;
+
+       sev_es_put_ghcb(&state);
+
+       local_irq_restore(flags);
+
+       return ret;
+}
+
+int sev_es_setup_ap_jump_table(struct real_mode_header *rmh)
+{
+       u16 startup_cs, startup_ip;
+       phys_addr_t jump_table_pa;
+       u64 jump_table_addr;
+       u16 __iomem *jump_table;
+
+       jump_table_addr = get_jump_table_addr();
+
+       /* On UP guests there is no jump table so this is not a failure */
+       if (!jump_table_addr)
+               return 0;
+
+       /* Check if AP Jump Table is page-aligned */
+       if (jump_table_addr & ~PAGE_MASK)
+               return -EINVAL;
+
+       jump_table_pa = jump_table_addr & PAGE_MASK;
+
+       startup_cs = (u16)(rmh->trampoline_start >> 4);
+       startup_ip = (u16)(rmh->sev_es_trampoline_start -
+                          rmh->trampoline_start);
+
+       jump_table = ioremap_encrypted(jump_table_pa, PAGE_SIZE);
+       if (!jump_table)
+               return -EIO;
+
+       writew(startup_ip, &jump_table[0]);
+       writew(startup_cs, &jump_table[1]);
+
+       iounmap(jump_table);
+
+       return 0;
+}
+
+/*
+ * This is needed by the OVMF UEFI firmware which will use whatever it finds in
+ * the GHCB MSR as its GHCB to talk to the hypervisor. So make sure the per-cpu
+ * runtime GHCBs used by the kernel are also mapped in the EFI page-table.
+ */
+int __init sev_es_efi_map_ghcbs(pgd_t *pgd)
+{
+       struct sev_es_runtime_data *data;
+       unsigned long address, pflags;
+       int cpu;
+       u64 pfn;
+
+       if (!sev_es_active())
+               return 0;
+
+       pflags = _PAGE_NX | _PAGE_RW;
+
+       for_each_possible_cpu(cpu) {
+               data = per_cpu(runtime_data, cpu);
+
+               address = __pa(&data->ghcb_page);
+               pfn = address >> PAGE_SHIFT;
+
+               if (kernel_map_pages_in_pgd(pgd, pfn, address, 1, pflags))
+                       return 1;
+       }
+
+       return 0;
+}
+
+static enum es_result vc_handle_msr(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+       struct pt_regs *regs = ctxt->regs;
+       enum es_result ret;
+       u64 exit_info_1;
+
+       /* Is it a WRMSR? */
+       exit_info_1 = (ctxt->insn.opcode.bytes[1] == 0x30) ? 1 : 0;
+
+       ghcb_set_rcx(ghcb, regs->cx);
+       if (exit_info_1) {
+               ghcb_set_rax(ghcb, regs->ax);
+               ghcb_set_rdx(ghcb, regs->dx);
+       }
+
+       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
+
+       if ((ret == ES_OK) && (!exit_info_1)) {
+               regs->ax = ghcb->save.rax;
+               regs->dx = ghcb->save.rdx;
+       }
+
+       return ret;
+}
+
+/*
+ * This function runs on the first #VC exception after the kernel
+ * switched to virtual addresses.
+ */
+static bool __init sev_es_setup_ghcb(void)
+{
+       /* First make sure the hypervisor talks a supported protocol. */
+       if (!sev_es_negotiate_protocol())
+               return false;
+
+       /*
+        * Clear the boot_ghcb. The first exception comes in before the bss
+        * section is cleared.
+        */
+       memset(&boot_ghcb_page, 0, PAGE_SIZE);
+
+       /* Alright - Make the boot-ghcb public */
+       boot_ghcb = &boot_ghcb_page;
+
+       return true;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void sev_es_ap_hlt_loop(void)
+{
+       struct ghcb_state state;
+       struct ghcb *ghcb;
+
+       ghcb = sev_es_get_ghcb(&state);
+
+       while (true) {
+               vc_ghcb_invalidate(ghcb);
+               ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_HLT_LOOP);
+               ghcb_set_sw_exit_info_1(ghcb, 0);
+               ghcb_set_sw_exit_info_2(ghcb, 0);
+
+               sev_es_wr_ghcb_msr(__pa(ghcb));
+               VMGEXIT();
+
+               /* Wakeup signal? */
+               if (ghcb_sw_exit_info_2_is_valid(ghcb) &&
+                   ghcb->save.sw_exit_info_2)
+                       break;
+       }
+
+       sev_es_put_ghcb(&state);
+}
+
+/*
+ * Play_dead handler when running under SEV-ES. This is needed because
+ * the hypervisor can't deliver an SIPI request to restart the AP.
+ * Instead the kernel has to issue a VMGEXIT to halt the VCPU until the
+ * hypervisor wakes it up again.
+ */
+static void sev_es_play_dead(void)
+{
+       play_dead_common();
+
+       /* IRQs now disabled */
+
+       sev_es_ap_hlt_loop();
+
+       /*
+        * If we get here, the VCPU was woken up again. Jump to CPU
+        * startup code to get it back online.
+        */
+       start_cpu0();
+}
+#else  /* CONFIG_HOTPLUG_CPU */
+#define sev_es_play_dead       native_play_dead
+#endif /* CONFIG_HOTPLUG_CPU */
+
+#ifdef CONFIG_SMP
+static void __init sev_es_setup_play_dead(void)
+{
+       smp_ops.play_dead = sev_es_play_dead;
+}
+#else
+static inline void sev_es_setup_play_dead(void) { }
+#endif
+
+static void __init alloc_runtime_data(int cpu)
+{
+       struct sev_es_runtime_data *data;
+
+       data = memblock_alloc(sizeof(*data), PAGE_SIZE);
+       if (!data)
+               panic("Can't allocate SEV-ES runtime data");
+
+       per_cpu(runtime_data, cpu) = data;
+}
+
+static void __init init_ghcb(int cpu)
+{
+       struct sev_es_runtime_data *data;
+       int err;
+
+       data = per_cpu(runtime_data, cpu);
+
+       err = early_set_memory_decrypted((unsigned long)&data->ghcb_page,
+                                        sizeof(data->ghcb_page));
+       if (err)
+               panic("Can't map GHCBs unencrypted");
+
+       memset(&data->ghcb_page, 0, sizeof(data->ghcb_page));
+
+       data->ghcb_active = false;
+       data->backup_ghcb_active = false;
+}
+
+void __init sev_es_init_vc_handling(void)
+{
+       int cpu;
+
+       BUILD_BUG_ON(offsetof(struct sev_es_runtime_data, ghcb_page) % PAGE_SIZE);
+
+       if (!sev_es_active())
+               return;
+
+       if (!sev_es_check_cpu_features())
+               panic("SEV-ES CPU Features missing");
+
+       /* Enable SEV-ES special handling */
+       static_branch_enable(&sev_es_enable_key);
+
+       /* Initialize per-cpu GHCB pages */
+       for_each_possible_cpu(cpu) {
+               alloc_runtime_data(cpu);
+               init_ghcb(cpu);
+               setup_vc_stacks(cpu);
+       }
+
+       sev_es_setup_play_dead();
+
+       /* Secondary CPUs use the runtime #VC handler */
+       initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication;
+}
+
+static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
+{
+       int trapnr = ctxt->fi.vector;
+
+       if (trapnr == X86_TRAP_PF)
+               native_write_cr2(ctxt->fi.cr2);
+
+       ctxt->regs->orig_ax = ctxt->fi.error_code;
+       do_early_exception(ctxt->regs, trapnr);
+}
+
+static long *vc_insn_get_reg(struct es_em_ctxt *ctxt)
+{
+       long *reg_array;
+       int offset;
+
+       reg_array = (long *)ctxt->regs;
+       offset    = insn_get_modrm_reg_off(&ctxt->insn, ctxt->regs);
+
+       if (offset < 0)
+               return NULL;
+
+       offset /= sizeof(long);
+
+       return reg_array + offset;
+}
+
+static long *vc_insn_get_rm(struct es_em_ctxt *ctxt)
+{
+       long *reg_array;
+       int offset;
+
+       reg_array = (long *)ctxt->regs;
+       offset    = insn_get_modrm_rm_off(&ctxt->insn, ctxt->regs);
+
+       if (offset < 0)
+               return NULL;
+
+       offset /= sizeof(long);
+
+       return reg_array + offset;
+}
+static enum es_result vc_do_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt,
+                                unsigned int bytes, bool read)
+{
+       u64 exit_code, exit_info_1, exit_info_2;
+       unsigned long ghcb_pa = __pa(ghcb);
+       enum es_result res;
+       phys_addr_t paddr;
+       void __user *ref;
+
+       ref = insn_get_addr_ref(&ctxt->insn, ctxt->regs);
+       if (ref == (void __user *)-1L)
+               return ES_UNSUPPORTED;
+
+       exit_code = read ? SVM_VMGEXIT_MMIO_READ : SVM_VMGEXIT_MMIO_WRITE;
+
+       res = vc_slow_virt_to_phys(ghcb, ctxt, (unsigned long)ref, &paddr);
+       if (res != ES_OK) {
+               if (res == ES_EXCEPTION && !read)
+                       ctxt->fi.error_code |= X86_PF_WRITE;
+
+               return res;
+       }
+
+       exit_info_1 = paddr;
+       /* Can never be greater than 8 */
+       exit_info_2 = bytes;
+
+       ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
+
+       return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
+}
+
+static enum es_result vc_handle_mmio_twobyte_ops(struct ghcb *ghcb,
+                                                struct es_em_ctxt *ctxt)
+{
+       struct insn *insn = &ctxt->insn;
+       unsigned int bytes = 0;
+       enum es_result ret;
+       int sign_byte;
+       long *reg_data;
+
+       switch (insn->opcode.bytes[1]) {
+               /* MMIO Read w/ zero-extension */
+       case 0xb6:
+               bytes = 1;
+               fallthrough;
+       case 0xb7:
+               if (!bytes)
+                       bytes = 2;
+
+               ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+               if (ret)
+                       break;
+
+               /* Zero extend based on operand size */
+               reg_data = vc_insn_get_reg(ctxt);
+               if (!reg_data)
+                       return ES_DECODE_FAILED;
+
+               memset(reg_data, 0, insn->opnd_bytes);
+
+               memcpy(reg_data, ghcb->shared_buffer, bytes);
+               break;
+
+               /* MMIO Read w/ sign-extension */
+       case 0xbe:
+               bytes = 1;
+               fallthrough;
+       case 0xbf:
+               if (!bytes)
+                       bytes = 2;
+
+               ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+               if (ret)
+                       break;
+
+               /* Sign extend based on operand size */
+               reg_data = vc_insn_get_reg(ctxt);
+               if (!reg_data)
+                       return ES_DECODE_FAILED;
+
+               if (bytes == 1) {
+                       u8 *val = (u8 *)ghcb->shared_buffer;
+
+                       sign_byte = (*val & 0x80) ? 0xff : 0x00;
+               } else {
+                       u16 *val = (u16 *)ghcb->shared_buffer;
+
+                       sign_byte = (*val & 0x8000) ? 0xff : 0x00;
+               }
+               memset(reg_data, sign_byte, insn->opnd_bytes);
+
+               memcpy(reg_data, ghcb->shared_buffer, bytes);
+               break;
+
+       default:
+               ret = ES_UNSUPPORTED;
+       }
+
+       return ret;
+}
+
+/*
+ * The MOVS instruction has two memory operands, which raises the
+ * problem that it is not known whether the access to the source or the
+ * destination caused the #VC exception (and hence whether an MMIO read
+ * or write operation needs to be emulated).
+ *
+ * Instead of playing games with walking page-tables and trying to guess
+ * whether the source or destination is an MMIO range, split the move
+ * into two operations, a read and a write with only one memory operand.
+ * This will cause a nested #VC exception on the MMIO address which can
+ * then be handled.
+ *
+ * This implementation has the benefit that it also supports MOVS where
+ * source _and_ destination are MMIO regions.
+ *
+ * It will slow MOVS on MMIO down a lot, but in SEV-ES guests it is a
+ * rare operation. If it turns out to be a performance problem the split
+ * operations can be moved to memcpy_fromio() and memcpy_toio().
+ */
+static enum es_result vc_handle_mmio_movs(struct es_em_ctxt *ctxt,
+                                         unsigned int bytes)
+{
+       unsigned long ds_base, es_base;
+       unsigned char *src, *dst;
+       unsigned char buffer[8];
+       enum es_result ret;
+       bool rep;
+       int off;
+
+       ds_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_DS);
+       es_base = insn_get_seg_base(ctxt->regs, INAT_SEG_REG_ES);
+
+       if (ds_base == -1L || es_base == -1L) {
+               ctxt->fi.vector = X86_TRAP_GP;
+               ctxt->fi.error_code = 0;
+               return ES_EXCEPTION;
+       }
+
+       src = ds_base + (unsigned char *)ctxt->regs->si;
+       dst = es_base + (unsigned char *)ctxt->regs->di;
+
+       ret = vc_read_mem(ctxt, src, buffer, bytes);
+       if (ret != ES_OK)
+               return ret;
+
+       ret = vc_write_mem(ctxt, dst, buffer, bytes);
+       if (ret != ES_OK)
+               return ret;
+
+       if (ctxt->regs->flags & X86_EFLAGS_DF)
+               off = -bytes;
+       else
+               off =  bytes;
+
+       ctxt->regs->si += off;
+       ctxt->regs->di += off;
+
+       rep = insn_has_rep_prefix(&ctxt->insn);
+       if (rep)
+               ctxt->regs->cx -= 1;
+
+       if (!rep || ctxt->regs->cx == 0)
+               return ES_OK;
+       else
+               return ES_RETRY;
+}
+
+static enum es_result vc_handle_mmio(struct ghcb *ghcb,
+                                    struct es_em_ctxt *ctxt)
+{
+       struct insn *insn = &ctxt->insn;
+       unsigned int bytes = 0;
+       enum es_result ret;
+       long *reg_data;
+
+       switch (insn->opcode.bytes[0]) {
+       /* MMIO Write */
+       case 0x88:
+               bytes = 1;
+               fallthrough;
+       case 0x89:
+               if (!bytes)
+                       bytes = insn->opnd_bytes;
+
+               reg_data = vc_insn_get_reg(ctxt);
+               if (!reg_data)
+                       return ES_DECODE_FAILED;
+
+               memcpy(ghcb->shared_buffer, reg_data, bytes);
+
+               ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+               break;
+
+       case 0xc6:
+               bytes = 1;
+               fallthrough;
+       case 0xc7:
+               if (!bytes)
+                       bytes = insn->opnd_bytes;
+
+               memcpy(ghcb->shared_buffer, insn->immediate1.bytes, bytes);
+
+               ret = vc_do_mmio(ghcb, ctxt, bytes, false);
+               break;
+
+               /* MMIO Read */
+       case 0x8a:
+               bytes = 1;
+               fallthrough;
+       case 0x8b:
+               if (!bytes)
+                       bytes = insn->opnd_bytes;
+
+               ret = vc_do_mmio(ghcb, ctxt, bytes, true);
+               if (ret)
+                       break;
+
+               reg_data = vc_insn_get_reg(ctxt);
+               if (!reg_data)
+                       return ES_DECODE_FAILED;
+
+               /* Zero-extend for 32-bit operation */
+               if (bytes == 4)
+                       *reg_data = 0;
+
+               memcpy(reg_data, ghcb->shared_buffer, bytes);
+               break;
+
+               /* MOVS instruction */
+       case 0xa4:
+               bytes = 1;
+               fallthrough;
+       case 0xa5:
+               if (!bytes)
+                       bytes = insn->opnd_bytes;
+
+               ret = vc_handle_mmio_movs(ctxt, bytes);
+               break;
+               /* Two-Byte Opcodes */
+       case 0x0f:
+               ret = vc_handle_mmio_twobyte_ops(ghcb, ctxt);
+               break;
+       default:
+               ret = ES_UNSUPPORTED;
+       }
+
+       return ret;
+}
+
+static enum es_result vc_handle_dr7_write(struct ghcb *ghcb,
+                                         struct es_em_ctxt *ctxt)
+{
+       struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+       long val, *reg = vc_insn_get_rm(ctxt);
+       enum es_result ret;
+
+       if (!reg)
+               return ES_DECODE_FAILED;
+
+       val = *reg;
+
+       /* Upper 32 bits must be written as zeroes */
+       if (val >> 32) {
+               ctxt->fi.vector = X86_TRAP_GP;
+               ctxt->fi.error_code = 0;
+               return ES_EXCEPTION;
+       }
+
+       /* Clear out other reserved bits and set bit 10 */
+       val = (val & 0xffff23ffL) | BIT(10);
+
+       /* Early non-zero writes to DR7 are not supported */
+       if (!data && (val & ~DR7_RESET_VALUE))
+               return ES_UNSUPPORTED;
+
+       /* Using a value of 0 for ExitInfo1 means RAX holds the value */
+       ghcb_set_rax(ghcb, val);
+       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+       if (ret != ES_OK)
+               return ret;
+
+       if (data)
+               data->dr7 = val;
+
+       return ES_OK;
+}
+
+static enum es_result vc_handle_dr7_read(struct ghcb *ghcb,
+                                        struct es_em_ctxt *ctxt)
+{
+       struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+       long *reg = vc_insn_get_rm(ctxt);
+
+       if (!reg)
+               return ES_DECODE_FAILED;
+
+       if (data)
+               *reg = data->dr7;
+       else
+               *reg = DR7_RESET_VALUE;
+
+       return ES_OK;
+}
+
+static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
+                                      struct es_em_ctxt *ctxt)
+{
+       return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
+}
+
+static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
+{
+       enum es_result ret;
+
+       ghcb_set_rcx(ghcb, ctxt->regs->cx);
+
+       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
+       if (ret != ES_OK)
+               return ret;
+
+       if (!(ghcb_rax_is_valid(ghcb) && ghcb_rdx_is_valid(ghcb)))
+               return ES_VMM_ERROR;
+
+       ctxt->regs->ax = ghcb->save.rax;
+       ctxt->regs->dx = ghcb->save.rdx;
+
+       return ES_OK;
+}
+
+static enum es_result vc_handle_monitor(struct ghcb *ghcb,
+                                       struct es_em_ctxt *ctxt)
+{
+       /*
+        * Treat it as a NOP and do not leak a physical address to the
+        * hypervisor.
+        */
+       return ES_OK;
+}
+
+static enum es_result vc_handle_mwait(struct ghcb *ghcb,
+                                     struct es_em_ctxt *ctxt)
+{
+       /* Treat the same as MONITOR/MONITORX */
+       return ES_OK;
+}
+
+static enum es_result vc_handle_vmmcall(struct ghcb *ghcb,
+                                       struct es_em_ctxt *ctxt)
+{
+       enum es_result ret;
+
+       ghcb_set_rax(ghcb, ctxt->regs->ax);
+       ghcb_set_cpl(ghcb, user_mode(ctxt->regs) ? 3 : 0);
+
+       if (x86_platform.hyper.sev_es_hcall_prepare)
+               x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
+
+       ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+       if (ret != ES_OK)
+               return ret;
+
+       if (!ghcb_rax_is_valid(ghcb))
+               return ES_VMM_ERROR;
+
+       ctxt->regs->ax = ghcb->save.rax;
+
+       /*
+        * Call sev_es_hcall_finish() after regs->ax is already set.
+        * This allows the hypervisor handler to overwrite it again if
+        * necessary.
+        */
+       if (x86_platform.hyper.sev_es_hcall_finish &&
+           !x86_platform.hyper.sev_es_hcall_finish(ghcb, ctxt->regs))
+               return ES_VMM_ERROR;
+
+       return ES_OK;
+}
+
+static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
+                                       struct es_em_ctxt *ctxt)
+{
+       /*
+        * Calling ecx_alignment_check() directly does not work, because it
+        * enables IRQs and the GHCB is active. Forward the exception and call
+        * it later from vc_forward_exception().
+        */
+       ctxt->fi.vector = X86_TRAP_AC;
+       ctxt->fi.error_code = 0;
+       return ES_EXCEPTION;
+}
+
+static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
+{
+       if (user_mode(regs))
+               noist_exc_debug(regs);
+       else
+               exc_debug(regs);
+}
+
+static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
+                                        struct ghcb *ghcb,
+                                        unsigned long exit_code)
+{
+       enum es_result result;
+
+       switch (exit_code) {
+       case SVM_EXIT_READ_DR7:
+               result = vc_handle_dr7_read(ghcb, ctxt);
+               break;
+       case SVM_EXIT_WRITE_DR7:
+               result = vc_handle_dr7_write(ghcb, ctxt);
+               break;
+       case SVM_EXIT_EXCP_BASE + X86_TRAP_AC:
+               result = vc_handle_trap_ac(ghcb, ctxt);
+               break;
+       case SVM_EXIT_RDTSC:
+       case SVM_EXIT_RDTSCP:
+               result = vc_handle_rdtsc(ghcb, ctxt, exit_code);
+               break;
+       case SVM_EXIT_RDPMC:
+               result = vc_handle_rdpmc(ghcb, ctxt);
+               break;
+       case SVM_EXIT_INVD:
+               pr_err_ratelimited("#VC exception for INVD??? Seriously???\n");
+               result = ES_UNSUPPORTED;
+               break;
+       case SVM_EXIT_CPUID:
+               result = vc_handle_cpuid(ghcb, ctxt);
+               break;
+       case SVM_EXIT_IOIO:
+               result = vc_handle_ioio(ghcb, ctxt);
+               break;
+       case SVM_EXIT_MSR:
+               result = vc_handle_msr(ghcb, ctxt);
+               break;
+       case SVM_EXIT_VMMCALL:
+               result = vc_handle_vmmcall(ghcb, ctxt);
+               break;
+       case SVM_EXIT_WBINVD:
+               result = vc_handle_wbinvd(ghcb, ctxt);
+               break;
+       case SVM_EXIT_MONITOR:
+               result = vc_handle_monitor(ghcb, ctxt);
+               break;
+       case SVM_EXIT_MWAIT:
+               result = vc_handle_mwait(ghcb, ctxt);
+               break;
+       case SVM_EXIT_NPF:
+               result = vc_handle_mmio(ghcb, ctxt);
+               break;
+       default:
+               /*
+                * Unexpected #VC exception
+                */
+               result = ES_UNSUPPORTED;
+       }
+
+       return result;
+}
+
+static __always_inline void vc_forward_exception(struct es_em_ctxt *ctxt)
+{
+       long error_code = ctxt->fi.error_code;
+       int trapnr = ctxt->fi.vector;
+
+       ctxt->regs->orig_ax = ctxt->fi.error_code;
+
+       switch (trapnr) {
+       case X86_TRAP_GP:
+               exc_general_protection(ctxt->regs, error_code);
+               break;
+       case X86_TRAP_UD:
+               exc_invalid_op(ctxt->regs);
+               break;
+       case X86_TRAP_AC:
+               exc_alignment_check(ctxt->regs, error_code);
+               break;
+       default:
+               pr_emerg("Unsupported exception in #VC instruction emulation - can't continue\n");
+               BUG();
+       }
+}
+
+static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
+{
+       unsigned long sp = (unsigned long)regs;
+
+       return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
+}
+
+/*
+ * Main #VC exception handler. It is called when the entry code was able to
+ * switch off the IST to a safe kernel stack.
+ *
+ * With the current implementation it is always possible to switch to a safe
+ * stack because #VC exceptions only happen at known places, like intercepted
+ * instructions or accesses to MMIO areas/IO ports. They can also happen with
+ * code instrumentation when the hypervisor intercepts #DB, but the critical
+ * paths are forbidden to be instrumented, so #DB exceptions currently also
+ * only happen in safe places.
+ */
+DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
+{
+       struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+       irqentry_state_t irq_state;
+       struct ghcb_state state;
+       struct es_em_ctxt ctxt;
+       enum es_result result;
+       struct ghcb *ghcb;
+
+       /*
+        * Handle #DB before calling into !noinstr code to avoid recursive #DB.
+        */
+       if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
+               vc_handle_trap_db(regs);
+               return;
+       }
+
+       irq_state = irqentry_nmi_enter(regs);
+       lockdep_assert_irqs_disabled();
+       instrumentation_begin();
+
+       /*
+        * This is invoked through an interrupt gate, so IRQs are disabled. The
+        * code below might walk page-tables for user or kernel addresses, so
+        * keep the IRQs disabled to protect us against concurrent TLB flushes.
+        */
+
+       ghcb = sev_es_get_ghcb(&state);
+       if (!ghcb) {
+               /*
+                * Mark GHCBs inactive so that panic() is able to print the
+                * message.
+                */
+               data->ghcb_active        = false;
+               data->backup_ghcb_active = false;
+
+               panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
+       }
+
+       vc_ghcb_invalidate(ghcb);
+       result = vc_init_em_ctxt(&ctxt, regs, error_code);
+
+       if (result == ES_OK)
+               result = vc_handle_exitcode(&ctxt, ghcb, error_code);
+
+       sev_es_put_ghcb(&state);
+
+       /* Done - now check the result */
+       switch (result) {
+       case ES_OK:
+               vc_finish_insn(&ctxt);
+               break;
+       case ES_UNSUPPORTED:
+               pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+                                  error_code, regs->ip);
+               goto fail;
+       case ES_VMM_ERROR:
+               pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+                                  error_code, regs->ip);
+               goto fail;
+       case ES_DECODE_FAILED:
+               pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+                                  error_code, regs->ip);
+               goto fail;
+       case ES_EXCEPTION:
+               vc_forward_exception(&ctxt);
+               break;
+       case ES_RETRY:
+               /* Nothing to do */
+               break;
+       default:
+               pr_emerg("Unknown result in %s():%d\n", __func__, result);
+               /*
+                * Emulating the instruction which caused the #VC exception
+                * failed - can't continue so print debug information
+                */
+               BUG();
+       }
+
+out:
+       instrumentation_end();
+       irqentry_nmi_exit(regs, irq_state);
+
+       return;
+
+fail:
+       if (user_mode(regs)) {
+               /*
+                * Do not kill the machine if user-space triggered the
+                * exception. Send SIGBUS instead and let user-space deal with
+                * it.
+                */
+               force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
+       } else {
+               pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n",
+                        result);
+
+               /* Show some debug info */
+               show_regs(regs);
+
+               /* Ask hypervisor to sev_es_terminate */
+               sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+               /* If that fails and we get here - just panic */
+               panic("Returned from Terminate-Request to Hypervisor\n");
+       }
+
+       goto out;
+}
+
+/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */
+DEFINE_IDTENTRY_VC_IST(exc_vmm_communication)
+{
+       instrumentation_begin();
+       panic("Can't handle #VC exception from unsupported context\n");
+       instrumentation_end();
+}
+
+DEFINE_IDTENTRY_VC(exc_vmm_communication)
+{
+       if (likely(!on_vc_fallback_stack(regs)))
+               safe_stack_exc_vmm_communication(regs, error_code);
+       else
+               ist_exc_vmm_communication(regs, error_code);
+}
+
+bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
+{
+       unsigned long exit_code = regs->orig_ax;
+       struct es_em_ctxt ctxt;
+       enum es_result result;
+
+       /* Do initial setup or terminate the guest */
+       if (unlikely(boot_ghcb == NULL && !sev_es_setup_ghcb()))
+               sev_es_terminate(GHCB_SEV_ES_REASON_GENERAL_REQUEST);
+
+       vc_ghcb_invalidate(boot_ghcb);
+
+       result = vc_init_em_ctxt(&ctxt, regs, exit_code);
+       if (result == ES_OK)
+               result = vc_handle_exitcode(&ctxt, boot_ghcb, exit_code);
+
+       /* Done - now check the result */
+       switch (result) {
+       case ES_OK:
+               vc_finish_insn(&ctxt);
+               break;
+       case ES_UNSUPPORTED:
+               early_printk("PANIC: Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n",
+                               exit_code, regs->ip);
+               goto fail;
+       case ES_VMM_ERROR:
+               early_printk("PANIC: Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
+                               exit_code, regs->ip);
+               goto fail;
+       case ES_DECODE_FAILED:
+               early_printk("PANIC: Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
+                               exit_code, regs->ip);
+               goto fail;
+       case ES_EXCEPTION:
+               vc_early_forward_exception(&ctxt);
+               break;
+       case ES_RETRY:
+               /* Nothing to do */
+               break;
+       default:
+               BUG();
+       }
+
+       return true;
+
+fail:
+       show_regs(regs);
+
+       while (true)
+               halt();
+}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c

index 0ad5214..7770245 100644 (file)
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2043,7 +2043,7 @@ static bool amd_set_max_freq_ratio(void)
                 return false;
         }
  
-       highest_perf = perf_caps.highest_perf;
+       highest_perf = amd_get_highest_perf();
         nominal_perf = perf_caps.nominal_perf;
  
         if (!highest_perf || !nominal_perf) {
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c

index 19606a3..9a48f13 100644 (file)
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void)
                 F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
                 F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
                 F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
-               F(SGX_LC)
+               F(SGX_LC) | F(BUS_LOCK_DETECT)
         );
         /* Set LA57 based on hardware capability. */
         if (cpuid_ecx(7) & F(LA57))
@@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void)
                 F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
                 F(PMM) | F(PMM_EN)
         );
+
+       /*
+        * Hide RDTSCP and RDPID if either feature is reported as supported but
+        * probing MSR_TSC_AUX failed.  This is purely a sanity check and
+        * should never happen, but the guest will likely crash if RDTSCP or
+        * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in
+        * the past.  For example, the sanity check may fire if this instance of
+        * KVM is running as L1 on top of an older, broken KVM.
+        */
+       if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||
+                    kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&
+                    !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {
+               kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+               kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+       }
  }
  EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
  
@@ -637,7 +652,8 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
         case 7:
                 entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                 entry->eax = 0;
-               entry->ecx = F(RDPID);
+               if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+                       entry->ecx = F(RDPID);
                 ++array->nent;
         default:
                 break;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c

index 77e1c89..8a0ccdb 100644 (file)
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -4502,7 +4502,7 @@ static const struct opcode group8[] = {
   * from the register case of group9.
   */
  static const struct gprefix pfx_0f_c7_7 = {
-       N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+       N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
  };
  
  
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h

index 0d35911..f016838 100644 (file)
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -468,6 +468,7 @@ enum x86_intercept {
         x86_intercept_clgi,
         x86_intercept_skinit,
         x86_intercept_rdtscp,
+       x86_intercept_rdpid,
         x86_intercept_icebp,
         x86_intercept_wbinvd,
         x86_intercept_monitor,
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c

index 152591f..c0ebef5 100644 (file)
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1913,8 +1913,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
         if (!apic->lapic_timer.hv_timer_in_use)
                 goto out;
         WARN_ON(rcuwait_active(&vcpu->wait));
-       cancel_hv_timer(apic);
         apic_timer_expired(apic, false);
+       cancel_hv_timer(apic);
  
         if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
                 advance_periodic_target_expiration(apic);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c

index 4b3ee24..0144c40 100644 (file)
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
         if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
                 pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
  
-               if (WARN_ON_ONCE(!mmu->lm_root)) {
+               if (WARN_ON_ONCE(!mmu->pml4_root)) {
                         r = -EIO;
                         goto out_unlock;
                 }
  
-               mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;
+               mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
         }
  
         for (i = 0; i < 4; ++i) {
@@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
         }
  
         if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
-               mmu->root_hpa = __pa(mmu->lm_root);
+               mmu->root_hpa = __pa(mmu->pml4_root);
         else
                 mmu->root_hpa = __pa(mmu->pae_root);
  
@@ -3350,7 +3350,7 @@ out_unlock:
  static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
  {
         struct kvm_mmu *mmu = vcpu->arch.mmu;
-       u64 *lm_root, *pae_root;
+       u64 *pml4_root, *pae_root;
  
         /*
          * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
         if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
                 return -EIO;
  
-       if (mmu->pae_root && mmu->lm_root)
+       if (mmu->pae_root && mmu->pml4_root)
                 return 0;
  
         /*
          * The special roots should always be allocated in concert.  Yell and
          * bail if KVM ends up in a state where only one of the roots is valid.
          */
-       if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root))
+       if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
                 return -EIO;
  
         /*
@@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
         if (!pae_root)
                 return -ENOMEM;
  
-       lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
-       if (!lm_root) {
+       pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+       if (!pml4_root) {
                 free_page((unsigned long)pae_root);
                 return -ENOMEM;
         }
  
         mmu->pae_root = pae_root;
-       mmu->lm_root = lm_root;
+       mmu->pml4_root = pml4_root;
  
         return 0;
  }
@@ -5261,7 +5261,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
         if (!tdp_enabled && mmu->pae_root)
                 set_memory_encrypted((unsigned long)mmu->pae_root, 1);
         free_page((unsigned long)mmu->pae_root);
-       free_page((unsigned long)mmu->lm_root);
+       free_page((unsigned long)mmu->pml4_root);
  }
  
  static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c

index 88f69a6..95eeb5a 100644 (file)
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
  }
  
  /**
- * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * __handle_changed_spte - handle bookkeeping associated with an SPTE change
   * @kvm: kvm instance
   * @as_id: the address space of the paging structure the SPTE was a part of
   * @gfn: the base GFN that was mapped by the SPTE
@@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
  
         trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
  
+       if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+               if (is_large_pte(old_spte))
+                       atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+               else
+                       atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+       }
+
         /*
          * The only times a SPTE should be changed from a non-present to
          * non-present state is when an MMIO entry is installed/modified/
@@ -1009,6 +1016,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
                 }
  
                 if (!is_shadow_present_pte(iter.old_spte)) {
+                       /*
+                        * If SPTE has been forzen by another thread, just
+                        * give up and retry, avoiding unnecessary page table
+                        * allocation and free.
+                        */
+                       if (is_removed_spte(iter.old_spte))
+                               break;
+
                         sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
                         child_pt = sp->spt;
  
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c

index 540d43b..5e8d844 100644 (file)
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
         nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
  
         svm_switch_vmcb(svm, &svm->vmcb01);
-       WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
  
         /*
          * On vmexit the  GIF is set to false and
@@ -872,6 +871,15 @@ void svm_free_nested(struct vcpu_svm *svm)
         __free_page(virt_to_page(svm->nested.vmcb02.ptr));
         svm->nested.vmcb02.ptr = NULL;
  
+       /*
+        * When last_vmcb12_gpa matches the current vmcb12 gpa,
+        * some vmcb12 fields are not loaded if they are marked clean
+        * in the vmcb12, since in this case they are up to date already.
+        *
+        * When the vmcb02 is freed, this optimization becomes invalid.
+        */
+       svm->nested.last_vmcb12_gpa = INVALID_GPA;
+
         svm->nested.initialized = false;
  }
  
@@ -884,9 +892,11 @@ void svm_leave_nested(struct vcpu_svm *svm)
  
         if (is_guest_mode(vcpu)) {
                 svm->nested.nested_run_pending = 0;
+               svm->nested.vmcb12_gpa = INVALID_GPA;
+
                 leave_guest_mode(vcpu);
  
-               svm_switch_vmcb(svm, &svm->nested.vmcb02);
+               svm_switch_vmcb(svm, &svm->vmcb01);
  
                 nested_svm_uninit_mmu_context(vcpu);
                 vmcb_mark_all_dirty(svm->vmcb);
@@ -1298,12 +1308,17 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
          * L2 registers if needed are moved from the current VMCB to VMCB02.
          */
  
+       if (is_guest_mode(vcpu))
+               svm_leave_nested(svm);
+       else
+               svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+
+       svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+
         svm->nested.nested_run_pending =
                 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
  
         svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
-       if (svm->current_vmcb == &svm->vmcb01)
-               svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
  
         svm->vmcb01.ptr->save.es = save->es;
         svm->vmcb01.ptr->save.cs = save->cs;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c

index 1356ee0..5bc887e 100644 (file)
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -763,7 +763,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
  }
  
  static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
-                                 unsigned long __user dst_uaddr,
+                                 void __user *dst_uaddr,
                                   unsigned long dst_paddr,
                                   int size, int *err)
  {
@@ -787,8 +787,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
  
         if (tpage) {
                 offset = paddr & 15;
-               if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
-                                page_address(tpage) + offset, size))
+               if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
                         ret = -EFAULT;
         }
  
@@ -800,9 +799,9 @@ e_free:
  }
  
  static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
-                                 unsigned long __user vaddr,
+                                 void __user *vaddr,
                                   unsigned long dst_paddr,
-                                 unsigned long __user dst_vaddr,
+                                 void __user *dst_vaddr,
                                   int size, int *error)
  {
         struct page *src_tpage = NULL;
@@ -810,13 +809,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
         int ret, len = size;
  
         /* If source buffer is not aligned then use an intermediate buffer */
-       if (!IS_ALIGNED(vaddr, 16)) {
+       if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
                 src_tpage = alloc_page(GFP_KERNEL);
                 if (!src_tpage)
                         return -ENOMEM;
  
-               if (copy_from_user(page_address(src_tpage),
-                               (void __user *)(uintptr_t)vaddr, size)) {
+               if (copy_from_user(page_address(src_tpage), vaddr, size)) {
                         __free_page(src_tpage);
                         return -EFAULT;
                 }
@@ -830,7 +828,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
          *   - copy the source buffer in an intermediate buffer
          *   - use the intermediate buffer as source buffer
          */
-       if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+       if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
                 int dst_offset;
  
                 dst_tpage = alloc_page(GFP_KERNEL);
@@ -855,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
                                page_address(src_tpage), size);
                 else {
                         if (copy_from_user(page_address(dst_tpage) + dst_offset,
-                                          (void __user *)(uintptr_t)vaddr, size)) {
+                                          vaddr, size)) {
                                 ret = -EFAULT;
                                 goto e_free;
                         }
@@ -935,15 +933,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
                 if (dec)
                         ret = __sev_dbg_decrypt_user(kvm,
                                                      __sme_page_pa(src_p[0]) + s_off,
-                                                    dst_vaddr,
+                                                    (void __user *)dst_vaddr,
                                                      __sme_page_pa(dst_p[0]) + d_off,
                                                      len, &argp->error);
                 else
                         ret = __sev_dbg_encrypt_user(kvm,
                                                      __sme_page_pa(src_p[0]) + s_off,
-                                                    vaddr,
+                                                    (void __user *)vaddr,
                                                      __sme_page_pa(dst_p[0]) + d_off,
-                                                    dst_vaddr,
+                                                    (void __user *)dst_vaddr,
                                                      len, &argp->error);
  
                 sev_unpin_memory(kvm, src_p, n);
@@ -1764,7 +1762,8 @@ e_mirror_unlock:
  e_source_unlock:
         mutex_unlock(&source_kvm->lock);
  e_source_put:
-       fput(source_kvm_file);
+       if (source_kvm_file)
+               fput(source_kvm_file);
         return ret;
  }
  
@@ -2198,7 +2197,7 @@ vmgexit_err:
         return -EINVAL;
  }
  
-static void pre_sev_es_run(struct vcpu_svm *svm)
+void sev_es_unmap_ghcb(struct vcpu_svm *svm)
  {
         if (!svm->ghcb)
                 return;
@@ -2234,9 +2233,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
         struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
         int asid = sev_get_asid(svm->vcpu.kvm);
  
-       /* Perform any SEV-ES pre-run actions */
-       pre_sev_es_run(svm);
-
         /* Assign the asid allocated with this SEV guest */
         svm->asid = asid;
  
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c

index b649f92..05eca13 100644 (file)
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
   * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
   * defer the restoration of TSC_AUX until the CPU returns to userspace.
   */
-#define TSC_AUX_URET_SLOT      0
+static int tsc_aux_uret_slot __read_mostly = -1;
  
  static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
  
@@ -447,6 +447,11 @@ static int has_svm(void)
                 return 0;
         }
  
+       if (pgtable_l5_enabled()) {
+               pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
+               return 0;
+       }
+
         return 1;
  }
  
@@ -858,8 +863,8 @@ static __init void svm_adjust_mmio_mask(void)
                 return;
  
         /* If memory encryption is not enabled, use existing mask */
-       rdmsrl(MSR_K8_SYSCFG, msr);
-       if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+       rdmsrl(MSR_AMD64_SYSCFG, msr);
+       if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
                 return;
  
         enc_bit = cpuid_ebx(0x8000001f) & 0x3f;
@@ -959,8 +964,7 @@ static __init int svm_hardware_setup(void)
                 kvm_tsc_scaling_ratio_frac_bits = 32;
         }
  
-       if (boot_cpu_has(X86_FEATURE_RDTSCP))
-               kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
+       tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
  
         /* Check for pause filtering support */
         if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
@@ -1100,7 +1104,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
         return svm->vmcb->control.tsc_offset;
  }
  
-static void svm_check_invpcid(struct vcpu_svm *svm)
+/* Evaluate instruction intercepts that depend on guest CPUID features. */
+static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
+                                             struct vcpu_svm *svm)
  {
         /*
          * Intercept INVPCID if shadow paging is enabled to sync/free shadow
@@ -1113,6 +1119,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
                 else
                         svm_clr_intercept(svm, INTERCEPT_INVPCID);
         }
+
+       if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
+               if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+                       svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+               else
+                       svm_set_intercept(svm, INTERCEPT_RDTSCP);
+       }
  }
  
  static void init_vmcb(struct kvm_vcpu *vcpu)
@@ -1235,8 +1248,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
         svm->current_vmcb->asid_generation = 0;
         svm->asid = 0;
  
-       svm->nested.vmcb12_gpa = 0;
-       svm->nested.last_vmcb12_gpa = 0;
+       svm->nested.vmcb12_gpa = INVALID_GPA;
+       svm->nested.last_vmcb12_gpa = INVALID_GPA;
         vcpu->arch.hflags = 0;
  
         if (!kvm_pause_in_guest(vcpu->kvm)) {
@@ -1248,7 +1261,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
                 svm_clr_intercept(svm, INTERCEPT_PAUSE);
         }
  
-       svm_check_invpcid(svm);
+       svm_recalc_instruction_intercepts(vcpu, svm);
  
         /*
          * If the host supports V_SPEC_CTRL then disable the interception
@@ -1424,6 +1437,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
         struct vcpu_svm *svm = to_svm(vcpu);
         struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
  
+       if (sev_es_guest(vcpu->kvm))
+               sev_es_unmap_ghcb(svm);
+
         if (svm->guest_state_loaded)
                 return;
  
@@ -1445,8 +1461,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
                 }
         }
  
-       if (static_cpu_has(X86_FEATURE_RDTSCP))
-               kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
+       if (likely(tsc_aux_uret_slot >= 0))
+               kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
  
         svm->guest_state_loaded = true;
  }
@@ -2655,11 +2671,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                         msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
                 break;
         case MSR_TSC_AUX:
-               if (!boot_cpu_has(X86_FEATURE_RDTSCP))
-                       return 1;
-               if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-                       return 1;
                 msr_info->data = svm->tsc_aux;
                 break;
         /*
@@ -2876,30 +2887,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                 svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
                 break;
         case MSR_TSC_AUX:
-               if (!boot_cpu_has(X86_FEATURE_RDTSCP))
-                       return 1;
-
-               if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-                       return 1;
-
-               /*
-                * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
-                * incomplete and conflicting architectural behavior.  Current
-                * AMD CPUs completely ignore bits 63:32, i.e. they aren't
-                * reserved and always read as zeros.  Emulate AMD CPU behavior
-                * to avoid explosions if the vCPU is migrated from an AMD host
-                * to an Intel host.
-                */
-               data = (u32)data;
-
                 /*
                  * TSC_AUX is usually changed only during boot and never read
                  * directly.  Intercept TSC_AUX instead of exposing it to the
                  * guest via direct_access_msrs, and switch it via user return.
                  */
                 preempt_disable();
-               r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
+               r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
                 preempt_enable();
                 if (r)
                         return 1;
@@ -3084,6 +3078,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
         [SVM_EXIT_STGI]                         = stgi_interception,
         [SVM_EXIT_CLGI]                         = clgi_interception,
         [SVM_EXIT_SKINIT]                       = skinit_interception,
+       [SVM_EXIT_RDTSCP]                       = kvm_handle_invalid_op,
         [SVM_EXIT_WBINVD]                       = kvm_emulate_wbinvd,
         [SVM_EXIT_MONITOR]                      = kvm_emulate_monitor,
         [SVM_EXIT_MWAIT]                        = kvm_emulate_mwait,
@@ -3972,8 +3967,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
         svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
                              guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
  
-       /* Check again if INVPCID interception if required */
-       svm_check_invpcid(svm);
+       svm_recalc_instruction_intercepts(vcpu, svm);
  
         /* For sev guests, the memory encryption bit is not reserved in CR3.  */
         if (sev_guest(vcpu->kvm)) {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h

index 84b3133..2c9ece6 100644 (file)
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -20,6 +20,7 @@
  #include <linux/bits.h>
  
  #include <asm/svm.h>
+#include <asm/sev-common.h>
  
  #define __sme_page_pa(x) __sme_set(page_to_pfn(x) << PAGE_SHIFT)
  
@@ -525,40 +526,9 @@ void svm_vcpu_unblocking(struct kvm_vcpu *vcpu);
  
  /* sev.c */
  
-#define GHCB_VERSION_MAX               1ULL
-#define GHCB_VERSION_MIN               1ULL
-
-#define GHCB_MSR_INFO_POS              0
-#define GHCB_MSR_INFO_MASK             (BIT_ULL(12) - 1)
-
-#define GHCB_MSR_SEV_INFO_RESP         0x001
-#define GHCB_MSR_SEV_INFO_REQ          0x002
-#define GHCB_MSR_VER_MAX_POS           48
-#define GHCB_MSR_VER_MAX_MASK          0xffff
-#define GHCB_MSR_VER_MIN_POS           32
-#define GHCB_MSR_VER_MIN_MASK          0xffff
-#define GHCB_MSR_CBIT_POS              24
-#define GHCB_MSR_CBIT_MASK             0xff
-#define GHCB_MSR_SEV_INFO(_max, _min, _cbit)                           \
-       ((((_max) & GHCB_MSR_VER_MAX_MASK) << GHCB_MSR_VER_MAX_POS) |   \
-        (((_min) & GHCB_MSR_VER_MIN_MASK) << GHCB_MSR_VER_MIN_POS) |   \
-        (((_cbit) & GHCB_MSR_CBIT_MASK) << GHCB_MSR_CBIT_POS) |        \
-        GHCB_MSR_SEV_INFO_RESP)
-
-#define GHCB_MSR_CPUID_REQ             0x004
-#define GHCB_MSR_CPUID_RESP            0x005
-#define GHCB_MSR_CPUID_FUNC_POS                32
-#define GHCB_MSR_CPUID_FUNC_MASK       0xffffffff
-#define GHCB_MSR_CPUID_VALUE_POS       32
-#define GHCB_MSR_CPUID_VALUE_MASK      0xffffffff
-#define GHCB_MSR_CPUID_REG_POS         30
-#define GHCB_MSR_CPUID_REG_MASK                0x3
-
-#define GHCB_MSR_TERM_REQ              0x100
-#define GHCB_MSR_TERM_REASON_SET_POS   12
-#define GHCB_MSR_TERM_REASON_SET_MASK  0xf
-#define GHCB_MSR_TERM_REASON_POS       16
-#define GHCB_MSR_TERM_REASON_MASK      0xff
+#define GHCB_VERSION_MAX       1ULL
+#define GHCB_VERSION_MIN       1ULL
+
  
  extern unsigned int max_sev_asid;
  
@@ -581,6 +551,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
  void sev_es_create_vcpu(struct vcpu_svm *svm);
  void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
  void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
+void sev_es_unmap_ghcb(struct vcpu_svm *svm);
  
  /* vmenter.S */
  
diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h

index d1d7798..8dee8a5 100644 (file)
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@ -398,6 +398,9 @@ static inline u64 vmx_supported_debugctl(void)
  {
         u64 debugctl = 0;
  
+       if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+               debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+
         if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
                 debugctl |= DEBUGCTLMSR_LBR_MASK;
  
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c

index bced766..6058a65 100644 (file)
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
                         nested_vmx_handle_enlightened_vmptrld(vcpu, false);
  
                 if (evmptrld_status == EVMPTRLD_VMFAIL ||
-                   evmptrld_status == EVMPTRLD_ERROR) {
-                       pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
-                                            __func__);
-                       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-                       vcpu->run->internal.suberror =
-                               KVM_INTERNAL_ERROR_EMULATION;
-                       vcpu->run->internal.ndata = 0;
+                   evmptrld_status == EVMPTRLD_ERROR)
                         return false;
-               }
         }
  
         return true;
@@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
  
  static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
  {
-       if (!nested_get_evmcs_page(vcpu))
+       if (!nested_get_evmcs_page(vcpu)) {
+               pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
+                                    __func__);
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror =
+                       KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
+
                 return false;
+       }
  
         if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
                 return false;
@@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
         /* Similarly, triple faults in L2 should never escape. */
         WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
  
-       kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+       if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+               /*
+                * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
+                * Enlightened VMCS after migration and we still need to
+                * do that when something is forcing L2->L1 exit prior to
+                * the first L2 run.
+                */
+               (void)nested_get_evmcs_page(vcpu);
+       }
  
         /* Service the TLB flush request for L2 before switching to L1. */
         if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c

index d000cdd..4bceb5c 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -455,21 +455,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
  
  static unsigned long host_idt_base;
  
-/*
- * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
- * will emulate SYSCALL in legacy mode if the vendor string in guest
- * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
- * support this emulation, IA32_STAR must always be included in
- * vmx_uret_msrs_list[], even in i386 builds.
- */
-static const u32 vmx_uret_msrs_list[] = {
-#ifdef CONFIG_X86_64
-       MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
-#endif
-       MSR_EFER, MSR_TSC_AUX, MSR_STAR,
-       MSR_IA32_TSX_CTRL,
-};
-
  #if IS_ENABLED(CONFIG_HYPERV)
  static bool __read_mostly enlightened_vmcs = true;
  module_param(enlightened_vmcs, bool, 0444);
@@ -697,21 +682,11 @@ static bool is_valid_passthrough_msr(u32 msr)
         return r;
  }
  
-static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
-{
-       int i;
-
-       for (i = 0; i < vmx->nr_uret_msrs; ++i)
-               if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
-                       return i;
-       return -1;
-}
-
  struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
  {
         int i;
  
-       i = __vmx_find_uret_msr(vmx, msr);
+       i = kvm_find_user_return_msr(msr);
         if (i >= 0)
                 return &vmx->guest_uret_msrs[i];
         return NULL;
@@ -720,13 +695,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
  static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
                                   struct vmx_uret_msr *msr, u64 data)
  {
+       unsigned int slot = msr - vmx->guest_uret_msrs;
         int ret = 0;
  
         u64 old_msr_data = msr->data;
         msr->data = data;
-       if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
+       if (msr->load_into_hardware) {
                 preempt_disable();
-               ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
+               ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
                 preempt_enable();
                 if (ret)
                         msr->data = old_msr_data;
@@ -1078,7 +1054,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
                 return false;
         }
  
-       i = __vmx_find_uret_msr(vmx, MSR_EFER);
+       i = kvm_find_user_return_msr(MSR_EFER);
         if (i < 0)
                 return false;
  
@@ -1240,11 +1216,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
          */
         if (!vmx->guest_uret_msrs_loaded) {
                 vmx->guest_uret_msrs_loaded = true;
-               for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
-                       kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
+               for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+                       if (!vmx->guest_uret_msrs[i].load_into_hardware)
+                               continue;
+
+                       kvm_set_user_return_msr(i,
                                                 vmx->guest_uret_msrs[i].data,
                                                 vmx->guest_uret_msrs[i].mask);
-
+               }
         }
  
         if (vmx->nested.need_vmcs12_to_shadow_sync)
@@ -1751,19 +1730,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
         vmx_clear_hlt(vcpu);
  }
  
-static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
+static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
+                              bool load_into_hardware)
  {
-       struct vmx_uret_msr tmp;
-       int from, to;
+       struct vmx_uret_msr *uret_msr;
  
-       from = __vmx_find_uret_msr(vmx, msr);
-       if (from < 0)
+       uret_msr = vmx_find_uret_msr(vmx, msr);
+       if (!uret_msr)
                 return;
-       to = vmx->nr_active_uret_msrs++;
  
-       tmp = vmx->guest_uret_msrs[to];
-       vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
-       vmx->guest_uret_msrs[from] = tmp;
+       uret_msr->load_into_hardware = load_into_hardware;
  }
  
  /*
@@ -1773,29 +1749,42 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
   */
  static void setup_msrs(struct vcpu_vmx *vmx)
  {
-       vmx->guest_uret_msrs_loaded = false;
-       vmx->nr_active_uret_msrs = 0;
  #ifdef CONFIG_X86_64
+       bool load_syscall_msrs;
+
         /*
          * The SYSCALL MSRs are only needed on long mode guests, and only
          * when EFER.SCE is set.
          */
-       if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
-               vmx_setup_uret_msr(vmx, MSR_STAR);
-               vmx_setup_uret_msr(vmx, MSR_LSTAR);
-               vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
-       }
+       load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
+                           (vmx->vcpu.arch.efer & EFER_SCE);
+
+       vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
+       vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
+       vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
  #endif
-       if (update_transition_efer(vmx))
-               vmx_setup_uret_msr(vmx, MSR_EFER);
+       vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
  
-       if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
-               vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
+       vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
+                          guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
+                          guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
  
-       vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+       /*
+        * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
+        * kernel and old userspace.  If those guests run on a tsx=off host, do
+        * allow guests to use TSX_CTRL, but don't change the value in hardware
+        * so that TSX remains always disabled.
+        */
+       vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
  
         if (cpu_has_vmx_msr_bitmap())
                 vmx_update_msr_bitmap(&vmx->vcpu);
+
+       /*
+        * The set of MSRs to load may have changed, reload MSRs before the
+        * next VM-Enter.
+        */
+       vmx->guest_uret_msrs_loaded = false;
  }
  
  static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1993,11 +1982,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 else
                         msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
                 break;
-       case MSR_TSC_AUX:
-               if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-                       return 1;
-               goto find_uret_msr;
         case MSR_IA32_DEBUGCTLMSR:
                 msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
                 break;
@@ -2031,6 +2015,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
         if (!intel_pmu_lbr_is_enabled(vcpu))
                 debugctl &= ~DEBUGCTLMSR_LBR_MASK;
  
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+               debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+
         return debugctl;
  }
  
@@ -2313,14 +2300,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                 else
                         vmx->pt_desc.guest.addr_a[index / 2] = data;
                 break;
-       case MSR_TSC_AUX:
-               if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-                       return 1;
-               /* Check reserved bit, higher 32 bits should be zero */
-               if ((data >> 32) != 0)
-                       return 1;
-               goto find_uret_msr;
         case MSR_IA32_PERF_CAPABILITIES:
                 if (data && !vcpu_to_pmu(vcpu)->version)
                         return 1;
@@ -4369,7 +4348,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
                                                   xsaves_enabled, false);
         }
  
-       vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
+       /*
+        * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
+        * feature is exposed to the guest.  This creates a virtualization hole
+        * if both are supported in hardware but only one is exposed to the
+        * guest, but letting the guest execute RDTSCP or RDPID when either one
+        * is advertised is preferable to emulating the advertised instruction
+        * in KVM on #UD, and obviously better than incorrectly injecting #UD.
+        */
+       if (cpu_has_vmx_rdtscp()) {
+               bool rdpid_or_rdtscp_enabled =
+                       guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
+                       guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
+
+               vmx_adjust_secondary_exec_control(vmx, &exec_control,
+                                                 SECONDARY_EXEC_ENABLE_RDTSCP,
+                                                 rdpid_or_rdtscp_enabled, false);
+       }
         vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
  
         vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
@@ -6855,6 +6850,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
  
  static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
  {
+       struct vmx_uret_msr *tsx_ctrl;
         struct vcpu_vmx *vmx;
         int i, cpu, err;
  
@@ -6877,43 +6873,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
                         goto free_vpid;
         }
  
-       BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
-
-       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
-               u32 index = vmx_uret_msrs_list[i];
-               u32 data_low, data_high;
-               int j = vmx->nr_uret_msrs;
-
-               if (rdmsr_safe(index, &data_low, &data_high) < 0)
-                       continue;
-               if (wrmsr_safe(index, data_low, data_high) < 0)
-                       continue;
-
-               vmx->guest_uret_msrs[j].slot = i;
-               vmx->guest_uret_msrs[j].data = 0;
-               switch (index) {
-               case MSR_IA32_TSX_CTRL:
-                       /*
-                        * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
-                        * interception.  Keep the host value unchanged to avoid
-                        * changing CPUID bits under the host kernel's feet.
-                        *
-                        * hle=0, rtm=0, tsx_ctrl=1 can be found with some
-                        * combinations of new kernel and old userspace.  If
-                        * those guests run on a tsx=off host, do allow guests
-                        * to use TSX_CTRL, but do not change the value on the
-                        * host so that TSX remains always disabled.
-                        */
-                       if (boot_cpu_has(X86_FEATURE_RTM))
-                               vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
-                       else
-                               vmx->guest_uret_msrs[j].mask = 0;
-                       break;
-               default:
-                       vmx->guest_uret_msrs[j].mask = -1ull;
-                       break;
-               }
-               ++vmx->nr_uret_msrs;
+       for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+               vmx->guest_uret_msrs[i].data = 0;
+               vmx->guest_uret_msrs[i].mask = -1ull;
+       }
+       if (boot_cpu_has(X86_FEATURE_RTM)) {
+               /*
+                * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
+                * Keep the host value unchanged to avoid changing CPUID bits
+                * under the host kernel's feet.
+                */
+               tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+               if (tsx_ctrl)
+                       vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
         }
  
         err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7344,9 +7316,11 @@ static __init void vmx_set_cpu_caps(void)
         if (!cpu_has_vmx_xsaves())
                 kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
  
-       /* CPUID 0x80000001 */
-       if (!cpu_has_vmx_rdtscp())
+       /* CPUID 0x80000001 and 0x7 (RDPID) */
+       if (!cpu_has_vmx_rdtscp()) {
                 kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+               kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+       }
  
         if (cpu_has_vmx_waitpkg())
                 kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
@@ -7402,8 +7376,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
         /*
          * RDPID causes #UD if disabled through secondary execution controls.
          * Because it is marked as EmulateOnUD, we need to intercept it here.
+        * Note, RDPID is hidden behind ENABLE_RDTSCP.
          */
-       case x86_intercept_rdtscp:
+       case x86_intercept_rdpid:
                 if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
                         exception->vector = UD_VECTOR;
                         exception->error_code_valid = false;
@@ -7769,17 +7744,42 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
         .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
  };
  
+static __init void vmx_setup_user_return_msrs(void)
+{
+
+       /*
+        * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
+        * will emulate SYSCALL in legacy mode if the vendor string in guest
+        * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
+        * support this emulation, MSR_STAR is included in the list for i386,
+        * but is never loaded into hardware.  MSR_CSTAR is also never loaded
+        * into hardware and is here purely for emulation purposes.
+        */
+       const u32 vmx_uret_msrs_list[] = {
+       #ifdef CONFIG_X86_64
+               MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
+       #endif
+               MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+               MSR_IA32_TSX_CTRL,
+       };
+       int i;
+
+       BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
+
+       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
+               kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
+}
+
  static __init int hardware_setup(void)
  {
         unsigned long host_bndcfgs;
         struct desc_ptr dt;
-       int r, i, ept_lpage_level;
+       int r, ept_lpage_level;
  
         store_idt(&dt);
         host_idt_base = dt.address;
  
-       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
-               kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
+       vmx_setup_user_return_msrs();
  
         if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
                 return -EIO;
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h

index 008cb87..16e4e45 100644 (file)
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -36,7 +36,7 @@ struct vmx_msrs {
  };
  
  struct vmx_uret_msr {
-       unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
+       bool load_into_hardware;
         u64 data;
         u64 mask;
  };
@@ -245,8 +245,16 @@ struct vcpu_vmx {
         u32                   idt_vectoring_info;
         ulong                 rflags;
  
+       /*
+        * User return MSRs are always emulated when enabled in the guest, but
+        * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
+        * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
+        * be loaded into hardware if those conditions aren't met.
+        * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
+        * into hardware when running the guest.  guest_uret_msrs[] is resorted
+        * whenever the number of "active" uret MSRs is modified.
+        */
         struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
-       int                   nr_uret_msrs;
         int                   nr_active_uret_msrs;
         bool                  guest_uret_msrs_loaded;
  #ifdef CONFIG_X86_64
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c

index 6eda283..bbc4e04 100644 (file)
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
   */
  #define KVM_MAX_NR_USER_RETURN_MSRS 16
  
-struct kvm_user_return_msrs_global {
-       int nr;
-       u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];
-};
-
  struct kvm_user_return_msrs {
         struct user_return_notifier urn;
         bool registered;
@@ -198,7 +193,9 @@ struct kvm_user_return_msrs {
         } values[KVM_MAX_NR_USER_RETURN_MSRS];
  };
  
-static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;
+u32 __read_mostly kvm_nr_uret_msrs;
+EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
+static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
  static struct kvm_user_return_msrs __percpu *user_return_msrs;
  
  #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
@@ -330,23 +327,53 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
                 user_return_notifier_unregister(urn);
         }
         local_irq_restore(flags);
-       for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
+       for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
                 values = &msrs->values[slot];
                 if (values->host != values->curr) {
-                       wrmsrl(user_return_msrs_global.msrs[slot], values->host);
+                       wrmsrl(kvm_uret_msrs_list[slot], values->host);
                         values->curr = values->host;
                 }
         }
  }
  
-void kvm_define_user_return_msr(unsigned slot, u32 msr)
+static int kvm_probe_user_return_msr(u32 msr)
+{
+       u64 val;
+       int ret;
+
+       preempt_disable();
+       ret = rdmsrl_safe(msr, &val);
+       if (ret)
+               goto out;
+       ret = wrmsrl_safe(msr, val);
+out:
+       preempt_enable();
+       return ret;
+}
+
+int kvm_add_user_return_msr(u32 msr)
  {
-       BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-       user_return_msrs_global.msrs[slot] = msr;
-       if (slot >= user_return_msrs_global.nr)
-               user_return_msrs_global.nr = slot + 1;
+       BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
+
+       if (kvm_probe_user_return_msr(msr))
+               return -1;
+
+       kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
+       return kvm_nr_uret_msrs++;
  }
-EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
+EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
+
+int kvm_find_user_return_msr(u32 msr)
+{
+       int i;
+
+       for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+               if (kvm_uret_msrs_list[i] == msr)
+                       return i;
+       }
+       return -1;
+}
+EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
  
  static void kvm_user_return_msr_cpu_online(void)
  {
@@ -355,8 +382,8 @@ static void kvm_user_return_msr_cpu_online(void)
         u64 value;
         int i;
  
-       for (i = 0; i < user_return_msrs_global.nr; ++i) {
-               rdmsrl_safe(user_return_msrs_global.msrs[i], &value);
+       for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+               rdmsrl_safe(kvm_uret_msrs_list[i], &value);
                 msrs->values[i].host = value;
                 msrs->values[i].curr = value;
         }
@@ -371,7 +398,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
         value = (value & mask) | (msrs->values[slot].host & ~mask);
         if (value == msrs->values[slot].curr)
                 return 0;
-       err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);
+       err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
         if (err)
                 return 1;
  
@@ -1149,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
  
         if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
                 fixed |= DR6_RTM;
+
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+               fixed |= DR6_BUS_LOCK;
         return fixed;
  }
  
@@ -1615,6 +1645,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
                  * invokes 64-bit SYSENTER.
                  */
                 data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+               break;
+       case MSR_TSC_AUX:
+               if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+                       return 1;
+
+               if (!host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+                       return 1;
+
+               /*
+                * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
+                * incomplete and conflicting architectural behavior.  Current
+                * AMD CPUs completely ignore bits 63:32, i.e. they aren't
+                * reserved and always read as zeros.  Enforce Intel's reserved
+                * bits check if and only if the guest CPU is Intel, and clear
+                * the bits in all other cases.  This ensures cross-vendor
+                * migration will provide consistent behavior for the guest.
+                */
+               if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
+                       return 1;
+
+               data = (u32)data;
+               break;
         }
  
         msr.data = data;
@@ -1651,6 +1705,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
         if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
                 return KVM_MSR_RET_FILTERED;
  
+       switch (index) {
+       case MSR_TSC_AUX:
+               if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+                       return 1;
+
+               if (!host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+                       return 1;
+               break;
+       }
+
         msr.index = index;
         msr.host_initiated = host_initiated;
  
@@ -3402,7 +3468,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
         case MSR_IA32_LASTBRANCHTOIP:
         case MSR_IA32_LASTINTFROMIP:
         case MSR_IA32_LASTINTTOIP:
-       case MSR_K8_SYSCFG:
+       case MSR_AMD64_SYSCFG:
         case MSR_K8_TSEG_ADDR:
         case MSR_K8_TSEG_MASK:
         case MSR_VM_HSAVE_PA:
@@ -5468,14 +5534,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
  static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
                               struct kvm_msr_filter_range *user_range)
  {
-       struct msr_bitmap_range range;
         unsigned long *bitmap = NULL;
         size_t bitmap_size;
-       int r;
  
         if (!user_range->nmsrs)
                 return 0;
  
+       if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
+               return -EINVAL;
+
+       if (!user_range->flags)
+               return -EINVAL;
+
         bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
         if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
                 return -EINVAL;
@@ -5484,31 +5554,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
         if (IS_ERR(bitmap))
                 return PTR_ERR(bitmap);
  
-       range = (struct msr_bitmap_range) {
+       msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
                 .flags = user_range->flags,
                 .base = user_range->base,
                 .nmsrs = user_range->nmsrs,
                 .bitmap = bitmap,
         };
  
-       if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
-               r = -EINVAL;
-               goto err;
-       }
-
-       if (!range.flags) {
-               r = -EINVAL;
-               goto err;
-       }
-
-       /* Everything ok, add this range identifier. */
-       msr_filter->ranges[msr_filter->count] = range;
         msr_filter->count++;
-
         return 0;
-err:
-       kfree(bitmap);
-       return r;
  }
  
  static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
@@ -5937,7 +5991,8 @@ static void kvm_init_msr_list(void)
                                 continue;
                         break;
                 case MSR_TSC_AUX:
-                       if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+                       if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
+                           !kvm_cpu_cap_has(X86_FEATURE_RDPID))
                                 continue;
                         break;
                 case MSR_IA32_UMWAIT_CONTROL:
@@ -8039,6 +8094,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
  
  static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
  
+/*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+       queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
  /*
   * Notification about pvclock gtod data update.
   */
@@ -8050,13 +8117,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
  
         update_pvclock_gtod(tk);
  
-       /* disable master clock if host does not trust, or does not
-        * use, TSC based clocksource.
+       /*
+        * Disable master clock if host does not trust, or does not use,
+        * TSC based clocksource. Delegate queue_work() to irq_work as
+        * this is invoked with tk_core.seq write held.
          */
         if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
             atomic_read(&kvm_guest_has_master_clock) != 0)
-               queue_work(system_long_wq, &pvclock_gtod_work);
-
+               irq_work_queue(&pvclock_irq_work);
         return 0;
  }
  
@@ -8118,6 +8186,7 @@ int kvm_arch_init(void *opaque)
                 printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
                 goto out_free_x86_emulator_cache;
         }
+       kvm_nr_uret_msrs = 0;
  
         r = kvm_mmu_module_init();
         if (r)
@@ -8168,6 +8237,8 @@ void kvm_arch_exit(void)
         cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
  #ifdef CONFIG_X86_64
         pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+       irq_work_sync(&pvclock_irq_work);
+       cancel_work_sync(&pvclock_gtod_work);
  #endif
         kvm_x86_ops.hardware_enable = NULL;
         kvm_mmu_module_exit();
diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c

index b93d6cd..121921b 100644 (file)
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -5,7 +5,7 @@
  #include <xen/xen.h>
  
  #include <asm/fpu/internal.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
  #include <asm/traps.h>
  #include <asm/kdebug.h>
  
diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c

index 04aba7e..a9639f6 100644 (file)
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -529,7 +529,7 @@ void __init sme_enable(struct boot_params *bp)
                 /*
                  * No SME if Hypervisor bit is set. This check is here to
                  * prevent a guest from trying to enable SME. For running as a
-                * KVM guest the MSR_K8_SYSCFG will be sufficient, but there
+                * KVM guest the MSR_AMD64_SYSCFG will be sufficient, but there
                  * might be other hypervisors which emulate that MSR as non-zero
                  * or even pass it through to the guest.
                  * A malicious hypervisor can still trick a guest into this
@@ -542,8 +542,8 @@ void __init sme_enable(struct boot_params *bp)
                         return;
  
                 /* For SME, check the SYSCFG MSR */
-               msr = __rdmsr(MSR_K8_SYSCFG);
-               if (!(msr & MSR_K8_SYSCFG_MEM_ENCRYPT))
+               msr = __rdmsr(MSR_AMD64_SYSCFG);
+               if (!(msr & MSR_AMD64_SYSCFG_MEM_ENCRYPT))
                         return;
         } else {
                 /* SEV state cannot be controlled by a command line option */
diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c

index ae744b6..dd40d3f 100644 (file)
--- a/arch/x86/pci/amd_bus.c
+++ b/arch/x86/pci/amd_bus.c
@@ -284,7 +284,7 @@ static int __init early_root_info_init(void)
  
         /* need to take out [4G, TOM2) for RAM*/
         /* SYS_CFG */
-       address = MSR_K8_SYSCFG;
+       address = MSR_AMD64_SYSCFG;
         rdmsrl(address, val);
         /* TOP_MEM2 is enabled? */
         if (val & (1<<21)) {
diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c

index df7b547..7515e78 100644 (file)
--- a/arch/x86/platform/efi/efi_64.c
+++ b/arch/x86/platform/efi/efi_64.c
@@ -47,7 +47,7 @@
  #include <asm/realmode.h>
  #include <asm/time.h>
  #include <asm/pgalloc.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
  
  /*
   * We allocate runtime services regions top-down, starting from -4G, i.e.
diff --git a/arch/x86/realmode/init.c b/arch/x86/realmode/init.c

index 1be71ef..2e1c1be 100644 (file)
--- a/arch/x86/realmode/init.c
+++ b/arch/x86/realmode/init.c
@@ -9,7 +9,7 @@
  #include <asm/realmode.h>
  #include <asm/tlbflush.h>
  #include <asm/crash.h>
-#include <asm/sev-es.h>
+#include <asm/sev.h>
  
  struct real_mode_header *real_mode_header;
  u32 *trampoline_cr4_features;
diff --git a/arch/x86/realmode/rm/trampoline_64.S b/arch/x86/realmode/rm/trampoline_64.S

index 84c5d1b..cc8391f 100644 (file)
--- a/arch/x86/realmode/rm/trampoline_64.S
+++ b/arch/x86/realmode/rm/trampoline_64.S
@@ -123,9 +123,9 @@ SYM_CODE_START(startup_32)
          */
         btl     $TH_FLAGS_SME_ACTIVE_BIT, pa_tr_flags
         jnc     .Ldone
-       movl    $MSR_K8_SYSCFG, %ecx
+       movl    $MSR_AMD64_SYSCFG, %ecx
         rdmsr
-       bts     $MSR_K8_SYSCFG_MEM_ENCRYPT_BIT, %eax
+       bts     $MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT, %eax
         jc      .Ldone
  
         /*
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c

index 0270cd7..acd1f88 100644 (file)
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -372,9 +372,38 @@ struct bfq_queue *bic_to_bfqq(struct bfq_io_cq *bic, bool is_sync)
         return bic->bfqq[is_sync];
  }
  
+static void bfq_put_stable_ref(struct bfq_queue *bfqq);
+
  void bic_set_bfqq(struct bfq_io_cq *bic, struct bfq_queue *bfqq, bool is_sync)
  {
+       /*
+        * If bfqq != NULL, then a non-stable queue merge between
+        * bic->bfqq and bfqq is happening here. This causes troubles
+        * in the following case: bic->bfqq has also been scheduled
+        * for a possible stable merge with bic->stable_merge_bfqq,
+        * and bic->stable_merge_bfqq == bfqq happens to
+        * hold. Troubles occur because bfqq may then undergo a split,
+        * thereby becoming eligible for a stable merge. Yet, if
+        * bic->stable_merge_bfqq points exactly to bfqq, then bfqq
+        * would be stably merged with itself. To avoid this anomaly,
+        * we cancel the stable merge if
+        * bic->stable_merge_bfqq == bfqq.
+        */
         bic->bfqq[is_sync] = bfqq;
+
+       if (bfqq && bic->stable_merge_bfqq == bfqq) {
+               /*
+                * Actually, these same instructions are executed also
+                * in bfq_setup_cooperator, in case of abort or actual
+                * execution of a stable merge. We could avoid
+                * repeating these instructions there too, but if we
+                * did so, we would nest even more complexity in this
+                * function.
+                */
+               bfq_put_stable_ref(bic->stable_merge_bfqq);
+
+               bic->stable_merge_bfqq = NULL;
+       }
  }
  
  struct bfq_data *bic_to_bfqd(struct bfq_io_cq *bic)
@@ -2263,10 +2292,9 @@ static void bfq_remove_request(struct request_queue *q,
  
  }
  
-static bool bfq_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool bfq_bio_merge(struct request_queue *q, struct bio *bio,
                 unsigned int nr_segs)
  {
-       struct request_queue *q = hctx->queue;
         struct bfq_data *bfqd = q->elevator->elevator_data;
         struct request *free = NULL;
         /*
@@ -2631,8 +2659,6 @@ static bool bfq_may_be_close_cooperator(struct bfq_queue *bfqq,
  static bool idling_boosts_thr_without_issues(struct bfq_data *bfqd,
                                              struct bfq_queue *bfqq);
  
-static void bfq_put_stable_ref(struct bfq_queue *bfqq);
-
  /*
   * Attempt to schedule a merge of bfqq with the currently in-service
   * queue or with a close queue among the scheduled queues.  Return
diff --git a/block/blk-iocost.c b/block/blk-iocost.c

index e0c4baa..c2d6bc8 100644 (file)
--- a/block/blk-iocost.c
+++ b/block/blk-iocost.c
@@ -1069,7 +1069,17 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
  
         lockdep_assert_held(&ioc->lock);
  
-       inuse = clamp_t(u32, inuse, 1, active);
+       /*
+        * For an active leaf node, its inuse shouldn't be zero or exceed
+        * @active. An active internal node's inuse is solely determined by the
+        * inuse to active ratio of its children regardless of @inuse.
+        */
+       if (list_empty(&iocg->active_list) && iocg->child_active_sum) {
+               inuse = DIV64_U64_ROUND_UP(active * iocg->child_inuse_sum,
+                                          iocg->child_active_sum);
+       } else {
+               inuse = clamp_t(u32, inuse, 1, active);
+       }
  
         iocg->last_inuse = iocg->inuse;
         if (save)
@@ -1086,7 +1096,7 @@ static void __propagate_weights(struct ioc_gq *iocg, u32 active, u32 inuse,
                 /* update the level sums */
                 parent->child_active_sum += (s32)(active - child->active);
                 parent->child_inuse_sum += (s32)(inuse - child->inuse);
-               /* apply the udpates */
+               /* apply the updates */
                 child->active = active;
                 child->inuse = inuse;
  
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c

index 42a365b..996a4b2 100644 (file)
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -358,14 +358,16 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
                 unsigned int nr_segs)
  {
         struct elevator_queue *e = q->elevator;
-       struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
-       struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
+       struct blk_mq_ctx *ctx;
+       struct blk_mq_hw_ctx *hctx;
         bool ret = false;
         enum hctx_type type;
  
         if (e && e->type->ops.bio_merge)
-               return e->type->ops.bio_merge(hctx, bio, nr_segs);
+               return e->type->ops.bio_merge(q, bio, nr_segs);
  
+       ctx = blk_mq_get_ctx(q);
+       hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
         type = hctx->type;
         if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
             list_empty_careful(&ctx->rq_lists[type]))
diff --git a/block/blk-mq.c b/block/blk-mq.c

index 466676b..c86c01b 100644 (file)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2232,8 +2232,9 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
                 /* Bypass scheduler for flush requests */
                 blk_insert_flush(rq);
                 blk_mq_run_hw_queue(data.hctx, true);
-       } else if (plug && (q->nr_hw_queues == 1 || q->mq_ops->commit_rqs ||
-                               !blk_queue_nonrot(q))) {
+       } else if (plug && (q->nr_hw_queues == 1 ||
+                  blk_mq_is_sbitmap_shared(rq->mq_hctx->flags) ||
+                  q->mq_ops->commit_rqs || !blk_queue_nonrot(q))) {
                 /*
                  * Use plugging if we have a ->commit_rqs() hook as well, as
                  * we know the driver uses bd->last in a smart fashion.
@@ -3285,10 +3286,12 @@ EXPORT_SYMBOL(blk_mq_init_allocated_queue);
  /* tags can _not_ be used after returning from blk_mq_exit_queue */
  void blk_mq_exit_queue(struct request_queue *q)
  {
-       struct blk_mq_tag_set   *set = q->tag_set;
+       struct blk_mq_tag_set *set = q->tag_set;
  
-       blk_mq_del_queue_tag_set(q);
+       /* Checks hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED. */
         blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
+       /* May clear BLK_MQ_F_TAG_QUEUE_SHARED in hctx->flags. */
+       blk_mq_del_queue_tag_set(q);
  }
  
  static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c

index 8969e12..81e3279 100644 (file)
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -561,11 +561,12 @@ static void kyber_limit_depth(unsigned int op, struct blk_mq_alloc_data *data)
         }
  }
  
-static bool kyber_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool kyber_bio_merge(struct request_queue *q, struct bio *bio,
                 unsigned int nr_segs)
  {
+       struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
+       struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
         struct kyber_hctx_data *khd = hctx->sched_data;
-       struct blk_mq_ctx *ctx = blk_mq_get_ctx(hctx->queue);
         struct kyber_ctx_queue *kcq = &khd->kcqs[ctx->index_hw[hctx->type]];
         unsigned int sched_domain = kyber_sched_domain(bio->bi_opf);
         struct list_head *rq_list = &kcq->rq_list[sched_domain];
diff --git a/block/mq-deadline.c b/block/mq-deadline.c

index 04aded7..8eea2cb 100644 (file)
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -461,10 +461,9 @@ static int dd_request_merge(struct request_queue *q, struct request **rq,
         return ELEVATOR_NO_MERGE;
  }
  
-static bool dd_bio_merge(struct blk_mq_hw_ctx *hctx, struct bio *bio,
+static bool dd_bio_merge(struct request_queue *q, struct bio *bio,
                 unsigned int nr_segs)
  {
-       struct request_queue *q = hctx->queue;
         struct deadline_data *dd = q->elevator->elevator_data;
         struct request *free = NULL;
         bool ret;
diff --git a/block/partitions/efi.c b/block/partitions/efi.c

index b64bfdd..e271679 100644 (file)
--- a/block/partitions/efi.c
+++ b/block/partitions/efi.c
@@ -682,7 +682,7 @@ static void utf16_le_to_7bit(const __le16 *in, unsigned int size, u8 *out)
  }
  
  /**
- * efi_partition(struct parsed_partitions *state)
+ * efi_partition - scan for GPT partitions
   * @state: disk parsed partitions
   *
   * Description: called from check.c, if the disk contains GPT
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c

index 16c0fe8..d260bc1 100644 (file)
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1313,6 +1313,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
                 {"PNP0C0B", }, /* Generic ACPI fan */
                 {"INT3404", }, /* Fan */
                 {"INTC1044", }, /* Fan for Tiger Lake generation */
+               {"INTC1048", }, /* Fan for Alder Lake generation */
                 {}
         };
         struct acpi_device *adev = ACPI_COMPANION(dev);
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h

index b852cff..f973bbe 100644 (file)
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -142,6 +142,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
  int acpi_power_get_inferred_state(struct acpi_device *device, int *state);
  int acpi_power_on_resources(struct acpi_device *device, int state);
  int acpi_power_transition(struct acpi_device *device, int state);
+void acpi_turn_off_unused_power_resources(void);
  
  /* --------------------------------------------------------------------------
                                Device Power Management
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c

index 958aaac..23d9a09 100644 (file)
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -686,6 +686,13 @@ int nfit_spa_type(struct acpi_nfit_system_address *spa)
         return -1;
  }
  
+static size_t sizeof_spa(struct acpi_nfit_system_address *spa)
+{
+       if (spa->flags & ACPI_NFIT_LOCATION_COOKIE_VALID)
+               return sizeof(*spa);
+       return sizeof(*spa) - 8;
+}
+
  static bool add_spa(struct acpi_nfit_desc *acpi_desc,
                 struct nfit_table_prev *prev,
                 struct acpi_nfit_system_address *spa)
@@ -693,22 +700,22 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
         struct device *dev = acpi_desc->dev;
         struct nfit_spa *nfit_spa;
  
-       if (spa->header.length != sizeof(*spa))
+       if (spa->header.length != sizeof_spa(spa))
                 return false;
  
         list_for_each_entry(nfit_spa, &prev->spas, list) {
-               if (memcmp(nfit_spa->spa, spa, sizeof(*spa)) == 0) {
+               if (memcmp(nfit_spa->spa, spa, sizeof_spa(spa)) == 0) {
                         list_move_tail(&nfit_spa->list, &acpi_desc->spas);
                         return true;
                 }
         }
  
-       nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof(*spa),
+       nfit_spa = devm_kzalloc(dev, sizeof(*nfit_spa) + sizeof_spa(spa),
                         GFP_KERNEL);
         if (!nfit_spa)
                 return false;
         INIT_LIST_HEAD(&nfit_spa->list);
-       memcpy(nfit_spa->spa, spa, sizeof(*spa));
+       memcpy(nfit_spa->spa, spa, sizeof_spa(spa));
         list_add_tail(&nfit_spa->list, &acpi_desc->spas);
         dev_dbg(dev, "spa index: %d type: %s\n",
                         spa->range_index,
diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c

index 32974b5..56102ea 100644 (file)
--- a/drivers/acpi/power.c
+++ b/drivers/acpi/power.c
@@ -995,6 +995,7 @@ void acpi_resume_power_resources(void)
  
         mutex_unlock(&power_resource_list_lock);
  }
+#endif
  
  void acpi_turn_off_unused_power_resources(void)
  {
@@ -1015,4 +1016,3 @@ void acpi_turn_off_unused_power_resources(void)
  
         mutex_unlock(&power_resource_list_lock);
  }
-#endif
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c

index a22778e..453eff8 100644 (file)
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -700,6 +700,7 @@ int acpi_device_add(struct acpi_device *device,
  
                 result = acpi_device_set_name(device, acpi_device_bus_id);
                 if (result) {
+                       kfree_const(acpi_device_bus_id->bus_id);
                         kfree(acpi_device_bus_id);
                         goto err_unlock;
                 }
@@ -2359,6 +2360,8 @@ int __init acpi_scan_init(void)
                 }
         }
  
+       acpi_turn_off_unused_power_resources();
+
         acpi_scan_initialized = true;
  
   out:
diff --git a/drivers/acpi/sleep.h b/drivers/acpi/sleep.h

index 1856f76..7fe41ee 100644 (file)
--- a/drivers/acpi/sleep.h
+++ b/drivers/acpi/sleep.h
@@ -8,7 +8,6 @@ extern struct list_head acpi_wakeup_device_list;
  extern struct mutex acpi_device_lock;
  
  extern void acpi_resume_power_resources(void);
-extern void acpi_turn_off_unused_power_resources(void);
  
  static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
  {
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c

index 1fc1a99..b570848 100644 (file)
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1637,6 +1637,7 @@ void pm_runtime_init(struct device *dev)
         dev->power.request_pending = false;
         dev->power.request = RPM_REQ_NONE;
         dev->power.deferred_resume = false;
+       dev->power.needs_force_resume = 0;
         INIT_WORK(&dev->power.work, pm_runtime_work);
  
         dev->power.timer_expires = 0;
@@ -1804,10 +1805,12 @@ int pm_runtime_force_suspend(struct device *dev)
          * its parent, but set its status to RPM_SUSPENDED anyway in case this
          * function will be called again for it in the meantime.
          */
-       if (pm_runtime_need_not_resume(dev))
+       if (pm_runtime_need_not_resume(dev)) {
                 pm_runtime_set_suspended(dev);
-       else
+       } else {
                 __update_runtime_status(dev, RPM_SUSPENDED);
+               dev->power.needs_force_resume = 1;
+       }
  
         return 0;
  
@@ -1834,7 +1837,7 @@ int pm_runtime_force_resume(struct device *dev)
         int (*callback)(struct device *);
         int ret = 0;
  
-       if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev))
+       if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume)
                 goto out;
  
         /*
@@ -1853,6 +1856,7 @@ int pm_runtime_force_resume(struct device *dev)
  
         pm_runtime_mark_last_busy(dev);
  out:
+       dev->power.needs_force_resume = 0;
         pm_runtime_enable(dev);
         return ret;
  }
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c

index 4ff71b5..45d2c28 100644 (file)
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1980,7 +1980,8 @@ static void nbd_disconnect_and_put(struct nbd_device *nbd)
          * config ref and try to destroy the workqueue from inside the work
          * queue.
          */
-       flush_workqueue(nbd->recv_workq);
+       if (nbd->recv_workq)
+               flush_workqueue(nbd->recv_workq);
         if (test_and_clear_bit(NBD_RT_HAS_CONFIG_REF,
                                &nbd->config->runtime_flags))
                 nbd_config_put(nbd);
@@ -2014,12 +2015,11 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
                 return -EINVAL;
         }
         mutex_unlock(&nbd_index_mutex);
-       if (!refcount_inc_not_zero(&nbd->config_refs)) {
-               nbd_put(nbd);
-               return 0;
-       }
+       if (!refcount_inc_not_zero(&nbd->config_refs))
+               goto put_nbd;
         nbd_disconnect_and_put(nbd);
         nbd_config_put(nbd);
+put_nbd:
         nbd_put(nbd);
         return 0;
  }
diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c

index eff1f12..c84d239 100644 (file)
--- a/drivers/char/tpm/tpm2-cmd.c
+++ b/drivers/char/tpm/tpm2-cmd.c
@@ -656,6 +656,7 @@ int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
  
         if (nr_commands !=
             be32_to_cpup((__be32 *)&buf.data[TPM_HEADER_SIZE + 5])) {
+               rc = -EFAULT;
                 tpm_buf_destroy(&buf);
                 goto out;
         }
diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c

index a2e0395..55b9d39 100644 (file)
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -709,16 +709,14 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
         cap_t cap;
         int ret;
  
-       /* TPM 2.0 */
-       if (chip->flags & TPM_CHIP_FLAG_TPM2)
-               return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
-
-       /* TPM 1.2 */
         ret = request_locality(chip, 0);
         if (ret < 0)
                 return ret;
  
-       ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
+       if (chip->flags & TPM_CHIP_FLAG_TPM2)
+               ret = tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
+       else
+               ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
  
         release_locality(chip, 0);
  
@@ -1127,12 +1125,20 @@ int tpm_tis_resume(struct device *dev)
         if (ret)
                 return ret;
  
-       /* TPM 1.2 requires self-test on resume. This function actually returns
+       /*
+        * TPM 1.2 requires self-test on resume. This function actually returns
          * an error code but for unknown reason it isn't handled.
          */
-       if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
+       if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
+               ret = request_locality(chip, 0);
+               if (ret < 0)
+                       return ret;
+
                 tpm1_do_selftest(chip);
  
+               release_locality(chip, 0);
+       }
+
         return 0;
  }
  EXPORT_SYMBOL_GPL(tpm_tis_resume);
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c

index 977fd05..d6ece7b 100644 (file)
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -419,7 +419,7 @@ static void resume_hv_clock_tsc(struct clocksource *arg)
         hv_set_register(HV_REGISTER_REFERENCE_TSC, tsc_msr);
  }
  
-#ifdef VDSO_CLOCKMODE_HVCLOCK
+#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
  static int hv_cs_enable(struct clocksource *cs)
  {
         vclocks_set_used(VDSO_CLOCKMODE_HVCLOCK);
@@ -435,7 +435,7 @@ static struct clocksource hyperv_cs_tsc = {
         .flags  = CLOCK_SOURCE_IS_CONTINUOUS,
         .suspend= suspend_hv_clock_tsc,
         .resume = resume_hv_clock_tsc,
-#ifdef VDSO_CLOCKMODE_HVCLOCK
+#ifdef HAVE_VDSO_CLOCKMODE_HVCLOCK
         .enable = hv_cs_enable,
         .vdso_clock_mode = VDSO_CLOCKMODE_HVCLOCK,
  #else
diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c

index d1bbc16..7e74504 100644 (file)
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -646,7 +646,11 @@ static u64 get_max_boost_ratio(unsigned int cpu)
                 return 0;
         }
  
-       highest_perf = perf_caps.highest_perf;
+       if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
+               highest_perf = amd_get_highest_perf();
+       else
+               highest_perf = perf_caps.highest_perf;
+
         nominal_perf = perf_caps.nominal_perf;
  
         if (!highest_perf || !nominal_perf) {
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c

index f040106..0e69dff 100644 (file)
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -3033,6 +3033,14 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
         {}
  };
  
+static bool intel_pstate_hwp_is_enabled(void)
+{
+       u64 value;
+
+       rdmsrl(MSR_PM_ENABLE, value);
+       return !!(value & 0x1);
+}
+
  static int __init intel_pstate_init(void)
  {
         const struct x86_cpu_id *id;
@@ -3051,8 +3059,12 @@ static int __init intel_pstate_init(void)
                  * Avoid enabling HWP for processors without EPP support,
                  * because that means incomplete HWP implementation which is a
                  * corner case and supporting it is generally problematic.
+                *
+                * If HWP is enabled already, though, there is no choice but to
+                * deal with it.
                  */
-               if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) {
+               if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) ||
+                   intel_pstate_hwp_is_enabled()) {
                         hwp_active++;
                         hwp_mode_bdw = id->driver_data;
                         intel_pstate.attr = hwp_cpufreq_attrs;
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c

index 9fa4dfc..f0d8f60 100644 (file)
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -3083,7 +3083,7 @@ static void read_mc_regs(struct amd64_pvt *pvt)
         edac_dbg(0, "  TOP_MEM:  0x%016llx\n", pvt->top_mem);
  
         /* Check first whether TOP_MEM2 is enabled: */
-       rdmsrl(MSR_K8_SYSCFG, msr_val);
+       rdmsrl(MSR_AMD64_SYSCFG, msr_val);
         if (msr_val & BIT(21)) {
                 rdmsrl(MSR_K8_TOP_MEM2, pvt->top_mem2);
                 edac_dbg(0, "  TOP_MEM2: 0x%016llx\n", pvt->top_mem2);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h

index dc3a692..264176a 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1006,6 +1006,7 @@ struct amdgpu_device {
         struct amdgpu_df                df;
  
         struct amdgpu_ip_block          ip_blocks[AMDGPU_MAX_IP_NUM];
+       uint32_t                        harvest_ip_mask;
         int                             num_ip_blocks;
         struct mutex    mn_lock;
         DECLARE_HASHTABLE(mn_hash, 7);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

index 7d3b546..8b2a37b 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1683,6 +1683,19 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
         if (!ip_block_version)
                 return -EINVAL;
  
+       switch (ip_block_version->type) {
+       case AMD_IP_BLOCK_TYPE_VCN:
+               if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+                       return 0;
+               break;
+       case AMD_IP_BLOCK_TYPE_JPEG:
+               if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
+                       return 0;
+               break;
+       default:
+               break;
+       }
+
         DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks,
                   ip_block_version->funcs->name);
  
@@ -3111,7 +3124,6 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
         return amdgpu_device_asic_has_dc_support(adev->asic_type);
  }
  
-
  static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
  {
         struct amdgpu_device *adev =
@@ -3276,6 +3288,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
         adev->vm_manager.vm_pte_funcs = NULL;
         adev->vm_manager.vm_pte_num_scheds = 0;
         adev->gmc.gmc_funcs = NULL;
+       adev->harvest_ip_mask = 0x0;
         adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
         bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
  
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c

index b2dbcb4..e1b6f58 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -373,6 +373,34 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
         return -EINVAL;
  }
  
+void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
+{
+       struct binary_header *bhdr;
+       struct harvest_table *harvest_info;
+       int i;
+
+       bhdr = (struct binary_header *)adev->mman.discovery_bin;
+       harvest_info = (struct harvest_table *)(adev->mman.discovery_bin +
+                       le16_to_cpu(bhdr->table_list[HARVEST_INFO].offset));
+
+       for (i = 0; i < 32; i++) {
+               if (le32_to_cpu(harvest_info->list[i].hw_id) == 0)
+                       break;
+
+               switch (le32_to_cpu(harvest_info->list[i].hw_id)) {
+               case VCN_HWID:
+                       adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+                       adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+                       break;
+               case DMU_HWID:
+                       adev->harvest_ip_mask |= AMD_HARVEST_IP_DMU_MASK;
+                       break;
+               default:
+                       break;
+               }
+       }
+}
+
  int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
  {
         struct binary_header *bhdr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h

index 8f61838..1b1ae21 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@@ -29,6 +29,7 @@
  
  void amdgpu_discovery_fini(struct amdgpu_device *adev);
  int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
+void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev);
  int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
                                      int *major, int *minor, int *revision);
  int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c

index d54af7f..d290ca0 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -623,6 +623,16 @@ static const struct amdgpu_ip_block_version nv_common_ip_block =
         .funcs = &nv_common_ip_funcs,
  };
  
+static bool nv_is_headless_sku(struct pci_dev *pdev)
+{
+       if ((pdev->device == 0x731E &&
+           (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
+           (pdev->device == 0x7340 && pdev->revision == 0xC9)  ||
+           (pdev->device == 0x7360 && pdev->revision == 0xC7))
+               return true;
+       return false;
+}
+
  static int nv_reg_base_init(struct amdgpu_device *adev)
  {
         int r;
@@ -635,6 +645,12 @@ static int nv_reg_base_init(struct amdgpu_device *adev)
                         goto legacy_init;
                 }
  
+               amdgpu_discovery_harvest_ip(adev);
+               if (nv_is_headless_sku(adev->pdev)) {
+                       adev->harvest_ip_mask |= AMD_HARVEST_IP_VCN_MASK;
+                       adev->harvest_ip_mask |= AMD_HARVEST_IP_JPEG_MASK;
+               }
+
                 return 0;
         }
  
@@ -671,16 +687,6 @@ void nv_set_virt_ops(struct amdgpu_device *adev)
         adev->virt.ops = &xgpu_nv_virt_ops;
  }
  
-static bool nv_is_headless_sku(struct pci_dev *pdev)
-{
-       if ((pdev->device == 0x731E &&
-           (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
-           (pdev->device == 0x7340 && pdev->revision == 0xC9)  ||
-           (pdev->device == 0x7360 && pdev->revision == 0xC7))
-               return true;
-       return false;
-}
-
  int nv_set_ip_blocks(struct amdgpu_device *adev)
  {
         int r;
@@ -728,8 +734,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
                     !amdgpu_sriov_vf(adev))
                         amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-               if (!nv_is_headless_sku(adev->pdev))
-                       amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+               amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
                 amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
                 if (adev->enable_mes)
                         amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
@@ -752,8 +757,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
                 if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
                     !amdgpu_sriov_vf(adev))
                         amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-               if (!nv_is_headless_sku(adev->pdev))
-                       amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+               amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
                 if (!amdgpu_sriov_vf(adev))
                         amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
                 break;
@@ -777,7 +781,6 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
                 amdgpu_device_ip_block_add(adev, &vcn_v3_0_ip_block);
                 if (!amdgpu_sriov_vf(adev))
                         amdgpu_device_ip_block_add(adev, &jpeg_v3_0_ip_block);
-
                 if (adev->enable_mes)
                         amdgpu_device_ip_block_add(adev, &mes_v10_1_ip_block);
                 break;
@@ -1149,6 +1152,11 @@ static int nv_common_early_init(void *handle)
                 return -EINVAL;
         }
  
+       if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
+               adev->pg_flags &= ~(AMD_PG_SUPPORT_VCN |
+                                   AMD_PG_SUPPORT_VCN_DPG |
+                                   AMD_PG_SUPPORT_JPEG);
+
         if (amdgpu_sriov_vf(adev)) {
                 amdgpu_virt_init_setting(adev);
                 xgpu_nv_mailbox_set_irq_funcs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c

index d80e12b..8e1b9a4 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -1401,7 +1401,8 @@ static int soc15_common_early_init(void *handle)
                                 AMD_CG_SUPPORT_MC_MGCG |
                                 AMD_CG_SUPPORT_MC_LS |
                                 AMD_CG_SUPPORT_SDMA_MGCG |
-                               AMD_CG_SUPPORT_SDMA_LS;
+                               AMD_CG_SUPPORT_SDMA_LS |
+                               AMD_CG_SUPPORT_VCN_MGCG;
  
                         adev->pg_flags = AMD_PG_SUPPORT_SDMA |
                                 AMD_PG_SUPPORT_MMHUB |
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c

index 51a773a..0c1beef 100644 (file)
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -1119,10 +1119,10 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
                 UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
         SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp);
  
-       /* put VCPU into reset */
-       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
-               UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
-               ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+       /* stall UMC channel */
+       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2),
+               UVD_LMI_CTRL2__STALL_ARB_UMC_MASK,
+               ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
  
         tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK |
                 UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
@@ -1141,6 +1141,11 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
                 UVD_SOFT_RESET__LMI_SOFT_RESET_MASK,
                 ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
  
+       /* put VCPU into reset */
+       WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET),
+               UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
+               ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
         WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0);
  
         vcn_v1_0_enable_clock_gating(adev);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c

index 616f5b1..666796a 100644 (file)
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -650,6 +650,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct
  
         /* File created at /sys/class/drm/card0/device/hdcp_srm*/
         hdcp_work[0].attr = data_attr;
+       sysfs_bin_attr_init(&hdcp_work[0].attr);
  
         if (sysfs_create_bin_file(&adev->dev->kobj, &hdcp_work[0].attr))
                 DRM_WARN("Failed to create device file hdcp_srm");
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h

index 43ed629..9ab706c 100644 (file)
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -216,6 +216,12 @@ enum PP_FEATURE_MASK {
         PP_GFX_DCS_MASK = 0x80000,
  };
  
+enum amd_harvest_ip_mask {
+    AMD_HARVEST_IP_VCN_MASK = 0x1,
+    AMD_HARVEST_IP_JPEG_MASK = 0x2,
+    AMD_HARVEST_IP_DMU_MASK = 0x4,
+};
+
  enum DC_FEATURE_MASK {
         DC_FBC_MASK = 0x1,
         DC_MULTI_MON_PP_MCLK_SWITCH_MASK = 0x2,
diff --git a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c

index 26a5321..15c0b8a 100644 (file)
--- a/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/si_dpm.c
@@ -4817,70 +4817,70 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
         u32 reg;
         int ret;
  
-       table->initialState.levels[0].mclk.vDLL_CNTL =
+       table->initialState.level.mclk.vDLL_CNTL =
                 cpu_to_be32(si_pi->clock_registers.dll_cntl);
-       table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+       table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
                 cpu_to_be32(si_pi->clock_registers.mclk_pwrmgt_cntl);
-       table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+       table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
                 cpu_to_be32(si_pi->clock_registers.mpll_ad_func_cntl);
-       table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+       table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
                 cpu_to_be32(si_pi->clock_registers.mpll_dq_func_cntl);
-       table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL =
+       table->initialState.level.mclk.vMPLL_FUNC_CNTL =
                 cpu_to_be32(si_pi->clock_registers.mpll_func_cntl);
-       table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+       table->initialState.level.mclk.vMPLL_FUNC_CNTL_1 =
                 cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_1);
-       table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+       table->initialState.level.mclk.vMPLL_FUNC_CNTL_2 =
                 cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_2);
-       table->initialState.levels[0].mclk.vMPLL_SS =
+       table->initialState.level.mclk.vMPLL_SS =
                 cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-       table->initialState.levels[0].mclk.vMPLL_SS2 =
+       table->initialState.level.mclk.vMPLL_SS2 =
                 cpu_to_be32(si_pi->clock_registers.mpll_ss2);
  
-       table->initialState.levels[0].mclk.mclk_value =
+       table->initialState.level.mclk.mclk_value =
                 cpu_to_be32(initial_state->performance_levels[0].mclk);
  
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_2);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_3);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_4);
-       table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+       table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum);
-       table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
+       table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum_2);
  
-       table->initialState.levels[0].sclk.sclk_value =
+       table->initialState.level.sclk.sclk_value =
                 cpu_to_be32(initial_state->performance_levels[0].sclk);
  
-       table->initialState.levels[0].arbRefreshState =
+       table->initialState.level.arbRefreshState =
                 SISLANDS_INITIAL_STATE_ARB_INDEX;
  
-       table->initialState.levels[0].ACIndex = 0;
+       table->initialState.level.ACIndex = 0;
  
         ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
                                         initial_state->performance_levels[0].vddc,
-                                       &table->initialState.levels[0].vddc);
+                                       &table->initialState.level.vddc);
  
         if (!ret) {
                 u16 std_vddc;
  
                 ret = si_get_std_voltage_value(adev,
-                                              &table->initialState.levels[0].vddc,
+                                              &table->initialState.level.vddc,
                                                &std_vddc);
                 if (!ret)
                         si_populate_std_voltage_value(adev, std_vddc,
-                                                     table->initialState.levels[0].vddc.index,
-                                                     &table->initialState.levels[0].std_vddc);
+                                                     table->initialState.level.vddc.index,
+                                                     &table->initialState.level.std_vddc);
         }
  
         if (eg_pi->vddci_control)
                 si_populate_voltage_value(adev,
                                           &eg_pi->vddci_voltage_table,
                                           initial_state->performance_levels[0].vddci,
-                                         &table->initialState.levels[0].vddci);
+                                         &table->initialState.level.vddci);
  
         if (si_pi->vddc_phase_shed_control)
                 si_populate_phase_shedding_value(adev,
@@ -4888,41 +4888,41 @@ static int si_populate_smc_initial_state(struct amdgpu_device *adev,
                                                  initial_state->performance_levels[0].vddc,
                                                  initial_state->performance_levels[0].sclk,
                                                  initial_state->performance_levels[0].mclk,
-                                                &table->initialState.levels[0].vddc);
+                                                &table->initialState.level.vddc);
  
-       si_populate_initial_mvdd_value(adev, &table->initialState.levels[0].mvdd);
+       si_populate_initial_mvdd_value(adev, &table->initialState.level.mvdd);
  
         reg = CG_R(0xffff) | CG_L(0);
-       table->initialState.levels[0].aT = cpu_to_be32(reg);
-       table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
-       table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen;
+       table->initialState.level.aT = cpu_to_be32(reg);
+       table->initialState.level.bSP = cpu_to_be32(pi->dsp);
+       table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen;
  
         if (adev->gmc.vram_type == AMDGPU_VRAM_TYPE_GDDR5) {
-               table->initialState.levels[0].strobeMode =
+               table->initialState.level.strobeMode =
                         si_get_strobe_mode_settings(adev,
                                                     initial_state->performance_levels[0].mclk);
  
                 if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-                       table->initialState.levels[0].mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
+                       table->initialState.level.mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
                 else
-                       table->initialState.levels[0].mcFlags =  0;
+                       table->initialState.level.mcFlags =  0;
         }
  
         table->initialState.levelCount = 1;
  
         table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
  
-       table->initialState.levels[0].dpm2.MaxPS = 0;
-       table->initialState.levels[0].dpm2.NearTDPDec = 0;
-       table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-       table->initialState.levels[0].dpm2.BelowSafeInc = 0;
-       table->initialState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+       table->initialState.level.dpm2.MaxPS = 0;
+       table->initialState.level.dpm2.NearTDPDec = 0;
+       table->initialState.level.dpm2.AboveSafeInc = 0;
+       table->initialState.level.dpm2.BelowSafeInc = 0;
+       table->initialState.level.dpm2.PwrEfficiencyRatio = 0;
  
         reg = MIN_POWER_MASK | MAX_POWER_MASK;
-       table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+       table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
  
         reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-       table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+       table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
  
         return 0;
  }
@@ -4953,18 +4953,18 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
  
         if (pi->acpi_vddc) {
                 ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
-                                               pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+                                               pi->acpi_vddc, &table->ACPIState.level.vddc);
                 if (!ret) {
                         u16 std_vddc;
  
                         ret = si_get_std_voltage_value(adev,
-                                                      &table->ACPIState.levels[0].vddc, &std_vddc);
+                                                      &table->ACPIState.level.vddc, &std_vddc);
                         if (!ret)
                                 si_populate_std_voltage_value(adev, std_vddc,
-                                                             table->ACPIState.levels[0].vddc.index,
-                                                             &table->ACPIState.levels[0].std_vddc);
+                                                             table->ACPIState.level.vddc.index,
+                                                             &table->ACPIState.level.std_vddc);
                 }
-               table->ACPIState.levels[0].gen2PCIE = si_pi->acpi_pcie_gen;
+               table->ACPIState.level.gen2PCIE = si_pi->acpi_pcie_gen;
  
                 if (si_pi->vddc_phase_shed_control) {
                         si_populate_phase_shedding_value(adev,
@@ -4972,23 +4972,23 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
                                                          pi->acpi_vddc,
                                                          0,
                                                          0,
-                                                        &table->ACPIState.levels[0].vddc);
+                                                        &table->ACPIState.level.vddc);
                 }
         } else {
                 ret = si_populate_voltage_value(adev, &eg_pi->vddc_voltage_table,
-                                               pi->min_vddc_in_table, &table->ACPIState.levels[0].vddc);
+                                               pi->min_vddc_in_table, &table->ACPIState.level.vddc);
                 if (!ret) {
                         u16 std_vddc;
  
                         ret = si_get_std_voltage_value(adev,
-                                                      &table->ACPIState.levels[0].vddc, &std_vddc);
+                                                      &table->ACPIState.level.vddc, &std_vddc);
  
                         if (!ret)
                                 si_populate_std_voltage_value(adev, std_vddc,
-                                                             table->ACPIState.levels[0].vddc.index,
-                                                             &table->ACPIState.levels[0].std_vddc);
+                                                             table->ACPIState.level.vddc.index,
+                                                             &table->ACPIState.level.std_vddc);
                 }
-               table->ACPIState.levels[0].gen2PCIE =
+               table->ACPIState.level.gen2PCIE =
                         (u8)amdgpu_get_pcie_gen_support(adev,
                                                         si_pi->sys_pcie_mask,
                                                         si_pi->boot_pcie_gen,
@@ -5000,14 +5000,14 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
                                                          pi->min_vddc_in_table,
                                                          0,
                                                          0,
-                                                        &table->ACPIState.levels[0].vddc);
+                                                        &table->ACPIState.level.vddc);
         }
  
         if (pi->acpi_vddc) {
                 if (eg_pi->acpi_vddci)
                         si_populate_voltage_value(adev, &eg_pi->vddci_voltage_table,
                                                   eg_pi->acpi_vddci,
-                                                 &table->ACPIState.levels[0].vddci);
+                                                 &table->ACPIState.level.vddci);
         }
  
         mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET;
@@ -5018,59 +5018,59 @@ static int si_populate_smc_acpi_state(struct amdgpu_device *adev,
         spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
         spll_func_cntl_2 |= SCLK_MUX_SEL(4);
  
-       table->ACPIState.levels[0].mclk.vDLL_CNTL =
+       table->ACPIState.level.mclk.vDLL_CNTL =
                 cpu_to_be32(dll_cntl);
-       table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+       table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL =
                 cpu_to_be32(mclk_pwrmgt_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+       table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL =
                 cpu_to_be32(mpll_ad_func_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+       table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL =
                 cpu_to_be32(mpll_dq_func_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL =
+       table->ACPIState.level.mclk.vMPLL_FUNC_CNTL =
                 cpu_to_be32(mpll_func_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+       table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_1 =
                 cpu_to_be32(mpll_func_cntl_1);
-       table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+       table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_2 =
                 cpu_to_be32(mpll_func_cntl_2);
-       table->ACPIState.levels[0].mclk.vMPLL_SS =
+       table->ACPIState.level.mclk.vMPLL_SS =
                 cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-       table->ACPIState.levels[0].mclk.vMPLL_SS2 =
+       table->ACPIState.level.mclk.vMPLL_SS2 =
                 cpu_to_be32(si_pi->clock_registers.mpll_ss2);
  
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL =
                 cpu_to_be32(spll_func_cntl);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
                 cpu_to_be32(spll_func_cntl_2);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
                 cpu_to_be32(spll_func_cntl_3);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
                 cpu_to_be32(spll_func_cntl_4);
  
-       table->ACPIState.levels[0].mclk.mclk_value = 0;
-       table->ACPIState.levels[0].sclk.sclk_value = 0;
+       table->ACPIState.level.mclk.mclk_value = 0;
+       table->ACPIState.level.sclk.sclk_value = 0;
  
-       si_populate_mvdd_value(adev, 0, &table->ACPIState.levels[0].mvdd);
+       si_populate_mvdd_value(adev, 0, &table->ACPIState.level.mvdd);
  
         if (eg_pi->dynamic_ac_timing)
-               table->ACPIState.levels[0].ACIndex = 0;
+               table->ACPIState.level.ACIndex = 0;
  
-       table->ACPIState.levels[0].dpm2.MaxPS = 0;
-       table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-       table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-       table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
-       table->ACPIState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+       table->ACPIState.level.dpm2.MaxPS = 0;
+       table->ACPIState.level.dpm2.NearTDPDec = 0;
+       table->ACPIState.level.dpm2.AboveSafeInc = 0;
+       table->ACPIState.level.dpm2.BelowSafeInc = 0;
+       table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0;
  
         reg = MIN_POWER_MASK | MAX_POWER_MASK;
-       table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+       table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
  
         reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-       table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+       table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
  
         return 0;
  }
  
  static int si_populate_ulv_state(struct amdgpu_device *adev,
-                                SISLANDS_SMC_SWSTATE *state)
+                                struct SISLANDS_SMC_SWSTATE_SINGLE *state)
  {
         struct evergreen_power_info *eg_pi = evergreen_get_pi(adev);
         struct si_power_info *si_pi = si_get_pi(adev);
@@ -5079,19 +5079,19 @@ static int si_populate_ulv_state(struct amdgpu_device *adev,
         int ret;
  
         ret = si_convert_power_level_to_smc(adev, &ulv->pl,
-                                           &state->levels[0]);
+                                           &state->level);
         if (!ret) {
                 if (eg_pi->sclk_deep_sleep) {
                         if (sclk_in_sr <= SCLK_MIN_DEEPSLEEP_FREQ)
-                               state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
+                               state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
                         else
-                               state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
+                               state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
                 }
                 if (ulv->one_pcie_lane_in_ulv)
                         state->flags |= PPSMC_SWSTATE_FLAG_PCIE_X1;
-               state->levels[0].arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
-               state->levels[0].ACIndex = 1;
-               state->levels[0].std_vddc = state->levels[0].vddc;
+               state->level.arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
+               state->level.ACIndex = 1;
+               state->level.std_vddc = state->level.vddc;
                 state->levelCount = 1;
  
                 state->flags |= PPSMC_SWSTATE_FLAG_DC;
@@ -5190,7 +5190,9 @@ static int si_init_smc_table(struct amdgpu_device *adev)
         if (ret)
                 return ret;
  
-       table->driverState = table->initialState;
+       table->driverState.flags = table->initialState.flags;
+       table->driverState.levelCount = table->initialState.levelCount;
+       table->driverState.levels[0] = table->initialState.level;
  
         ret = si_do_program_memory_timing_parameters(adev, amdgpu_boot_state,
                                                      SISLANDS_INITIAL_STATE_ARB_INDEX);
@@ -5737,8 +5739,8 @@ static int si_upload_ulv_state(struct amdgpu_device *adev)
         if (ulv->supported && ulv->pl.vddc) {
                 u32 address = si_pi->state_table_start +
                         offsetof(SISLANDS_SMC_STATETABLE, ULVState);
-               SISLANDS_SMC_SWSTATE *smc_state = &si_pi->smc_statetable.ULVState;
-               u32 state_size = sizeof(SISLANDS_SMC_SWSTATE);
+               struct SISLANDS_SMC_SWSTATE_SINGLE *smc_state = &si_pi->smc_statetable.ULVState;
+               u32 state_size = sizeof(struct SISLANDS_SMC_SWSTATE_SINGLE);
  
                 memset(smc_state, 0, state_size);
  
diff --git a/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h b/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h

index 0f75540..c7dc117 100644 (file)
--- a/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h
+++ b/drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h
@@ -191,6 +191,14 @@ struct SISLANDS_SMC_SWSTATE
  
  typedef struct SISLANDS_SMC_SWSTATE SISLANDS_SMC_SWSTATE;
  
+struct SISLANDS_SMC_SWSTATE_SINGLE {
+       uint8_t                             flags;
+       uint8_t                             levelCount;
+       uint8_t                             padding2;
+       uint8_t                             padding3;
+       SISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
  #define SISLANDS_SMC_VOLTAGEMASK_VDDC  0
  #define SISLANDS_SMC_VOLTAGEMASK_MVDD  1
  #define SISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -208,19 +216,19 @@ typedef struct SISLANDS_SMC_VOLTAGEMASKTABLE SISLANDS_SMC_VOLTAGEMASKTABLE;
  
  struct SISLANDS_SMC_STATETABLE
  {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint32_t                            lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
-    SISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    SISLANDS_SMC_VOLTAGEMASKTABLE       phaseMaskTable;
-    PP_SIslands_DPM2Parameters          dpm2Params;
-    SISLANDS_SMC_SWSTATE                initialState;
-    SISLANDS_SMC_SWSTATE                ACPIState;
-    SISLANDS_SMC_SWSTATE                ULVState;
-    SISLANDS_SMC_SWSTATE                driverState;
-    SISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+       uint8_t                                 thermalProtectType;
+       uint8_t                                 systemFlags;
+       uint8_t                                 maxVDDCIndexInPPTable;
+       uint8_t                                 extraFlags;
+       uint32_t                                lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
+       SISLANDS_SMC_VOLTAGEMASKTABLE           voltageMaskTable;
+       SISLANDS_SMC_VOLTAGEMASKTABLE           phaseMaskTable;
+       PP_SIslands_DPM2Parameters              dpm2Params;
+       struct SISLANDS_SMC_SWSTATE_SINGLE      initialState;
+       struct SISLANDS_SMC_SWSTATE_SINGLE      ACPIState;
+       struct SISLANDS_SMC_SWSTATE_SINGLE      ULVState;
+       SISLANDS_SMC_SWSTATE                    driverState;
+       SISLANDS_SMC_HW_PERFORMANCE_LEVEL       dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
  };
  
  typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE;
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c

index 6a2dee8..642c60f 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -1095,44 +1095,6 @@ intel_dp_compute_link_config_wide(struct intel_dp *intel_dp,
         return -EINVAL;
  }
  
-/* Optimize link config in order: max bpp, min lanes, min clock */
-static int
-intel_dp_compute_link_config_fast(struct intel_dp *intel_dp,
-                                 struct intel_crtc_state *pipe_config,
-                                 const struct link_config_limits *limits)
-{
-       const struct drm_display_mode *adjusted_mode = &pipe_config->hw.adjusted_mode;
-       int bpp, clock, lane_count;
-       int mode_rate, link_clock, link_avail;
-
-       for (bpp = limits->max_bpp; bpp >= limits->min_bpp; bpp -= 2 * 3) {
-               int output_bpp = intel_dp_output_bpp(pipe_config->output_format, bpp);
-
-               mode_rate = intel_dp_link_required(adjusted_mode->crtc_clock,
-                                                  output_bpp);
-
-               for (lane_count = limits->min_lane_count;
-                    lane_count <= limits->max_lane_count;
-                    lane_count <<= 1) {
-                       for (clock = limits->min_clock; clock <= limits->max_clock; clock++) {
-                               link_clock = intel_dp->common_rates[clock];
-                               link_avail = intel_dp_max_data_rate(link_clock,
-                                                                   lane_count);
-
-                               if (mode_rate <= link_avail) {
-                                       pipe_config->lane_count = lane_count;
-                                       pipe_config->pipe_bpp = bpp;
-                                       pipe_config->port_clock = link_clock;
-
-                                       return 0;
-                               }
-                       }
-               }
-       }
-
-       return -EINVAL;
-}
-
  static int intel_dp_dsc_compute_bpp(struct intel_dp *intel_dp, u8 dsc_max_bpc)
  {
         int i, num_bpc;
@@ -1382,22 +1344,11 @@ intel_dp_compute_link_config(struct intel_encoder *encoder,
             intel_dp_can_bigjoiner(intel_dp))
                 pipe_config->bigjoiner = true;
  
-       if (intel_dp_is_edp(intel_dp))
-               /*
-                * Optimize for fast and narrow. eDP 1.3 section 3.3 and eDP 1.4
-                * section A.1: "It is recommended that the minimum number of
-                * lanes be used, using the minimum link rate allowed for that
-                * lane configuration."
-                *
-                * Note that we fall back to the max clock and lane count for eDP
-                * panels that fail with the fast optimal settings (see
-                * intel_dp->use_max_params), in which case the fast vs. wide
-                * choice doesn't matter.
-                */
-               ret = intel_dp_compute_link_config_fast(intel_dp, pipe_config, &limits);
-       else
-               /* Optimize for slow and wide. */
-               ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
+       /*
+        * Optimize for slow and wide for everything, because there are some
+        * eDP 1.3 and 1.4 panels don't work well with fast and narrow.
+        */
+       ret = intel_dp_compute_link_config_wide(intel_dp, pipe_config, &limits);
  
         /* enable compression if the mode doesn't fit available BW */
         drm_dbg_kms(&i915->drm, "Force DSC en = %d\n", intel_dp->force_dsc_en);
@@ -2160,7 +2111,7 @@ void intel_dp_check_frl_training(struct intel_dp *intel_dp)
          * -PCON supports SRC_CTL_MODE (VESA DP2.0-HDMI2.1 PCON Spec Draft-1 Sec-7)
          * -sink is HDMI2.1
          */
-       if (!(intel_dp->dpcd[2] & DP_PCON_SOURCE_CTL_MODE) ||
+       if (!(intel_dp->downstream_ports[2] & DP_PCON_SOURCE_CTL_MODE) ||
             !intel_dp_is_hdmi_2_1_sink(intel_dp) ||
             intel_dp->frl.is_trained)
                 return;
diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c

index e5dadde..bbaf055 100644 (file)
--- a/drivers/gpu/drm/i915/display/intel_overlay.c
+++ b/drivers/gpu/drm/i915/display/intel_overlay.c
@@ -383,7 +383,7 @@ static void intel_overlay_off_tail(struct intel_overlay *overlay)
                 i830_overlay_clock_gating(dev_priv, true);
  }
  
-static void
+__i915_active_call static void
  intel_overlay_last_flip_retire(struct i915_active *active)
  {
         struct intel_overlay *overlay =
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c

index 23f6b00..f6fe5cb 100644 (file)
--- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
@@ -189,7 +189,7 @@ compute_partial_view(const struct drm_i915_gem_object *obj,
         struct i915_ggtt_view view;
  
         if (i915_gem_object_is_tiled(obj))
-               chunk = roundup(chunk, tile_row_pages(obj));
+               chunk = roundup(chunk, tile_row_pages(obj) ?: 1);
  
         view.type = I915_GGTT_VIEW_PARTIAL;
         view.partial.offset = rounddown(page_offset, chunk);
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c

index 176c196..74bf6fc 100644 (file)
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -641,7 +641,6 @@ static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
  
                 err = pin_pt_dma(vm, pde->pt.base);
                 if (err) {
-                       i915_gem_object_put(pde->pt.base);
                         free_pd(vm, pde);
                         return err;
                 }
diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c

index e72b7a0..8a32259 100644 (file)
--- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
+++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
@@ -653,8 +653,8 @@ static void detect_bit_6_swizzle(struct i915_ggtt *ggtt)
                  * banks of memory are paired and unswizzled on the
                  * uneven portion, so leave that as unknown.
                  */
-               if (intel_uncore_read(uncore, C0DRB3) ==
-                   intel_uncore_read(uncore, C1DRB3)) {
+               if (intel_uncore_read16(uncore, C0DRB3) ==
+                   intel_uncore_read16(uncore, C1DRB3)) {
                         swizzle_x = I915_BIT_6_SWIZZLE_9_10;
                         swizzle_y = I915_BIT_6_SWIZZLE_9;
                 }
diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c

index cf9a3d3..aa573b0 100644 (file)
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -1156,7 +1156,8 @@ static int auto_active(struct i915_active *ref)
         return 0;
  }
  
-static void auto_retire(struct i915_active *ref)
+__i915_active_call static void
+auto_retire(struct i915_active *ref)
  {
         i915_active_put(ref);
  }
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c

index d553f62..b4d8e1b 100644 (file)
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -1153,10 +1153,6 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
  {
         struct device_node *phandle;
  
-       a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
-       if (IS_ERR(a6xx_gpu->llc_mmio))
-               return;
-
         /*
          * There is a different programming path for targets with an mmu500
          * attached, so detect if that is the case
@@ -1166,6 +1162,11 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
                 of_device_is_compatible(phandle, "arm,mmu-500"));
         of_node_put(phandle);
  
+       if (a6xx_gpu->have_mmu500)
+               a6xx_gpu->llc_mmio = NULL;
+       else
+               a6xx_gpu->llc_mmio = msm_ioremap(pdev, "cx_mem", "gpu_cx");
+
         a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
         a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
  
diff --git a/drivers/gpu/drm/msm/dp/dp_audio.c b/drivers/gpu/drm/msm/dp/dp_audio.c

index 82a8673..d7e4a39 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_audio.c
+++ b/drivers/gpu/drm/msm/dp/dp_audio.c
@@ -527,6 +527,7 @@ int dp_audio_hw_params(struct device *dev,
         dp_audio_setup_acr(audio);
         dp_audio_safe_to_exit_level(audio);
         dp_audio_enable(audio, true);
+       dp_display_signal_audio_start(dp_display);
         dp_display->audio_enabled = true;
  
  end:
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c

index 5a39da6..1784e11 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -178,6 +178,15 @@ static int dp_del_event(struct dp_display_private *dp_priv, u32 event)
         return 0;
  }
  
+void dp_display_signal_audio_start(struct msm_dp *dp_display)
+{
+       struct dp_display_private *dp;
+
+       dp = container_of(dp_display, struct dp_display_private, dp_display);
+
+       reinit_completion(&dp->audio_comp);
+}
+
  void dp_display_signal_audio_complete(struct msm_dp *dp_display)
  {
         struct dp_display_private *dp;
@@ -586,10 +595,8 @@ static int dp_connect_pending_timeout(struct dp_display_private *dp, u32 data)
         mutex_lock(&dp->event_mutex);
  
         state = dp->hpd_state;
-       if (state == ST_CONNECT_PENDING) {
-               dp_display_enable(dp, 0);
+       if (state == ST_CONNECT_PENDING)
                 dp->hpd_state = ST_CONNECTED;
-       }
  
         mutex_unlock(&dp->event_mutex);
  
@@ -651,7 +658,6 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
         dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND);
  
         /* signal the disconnect event early to ensure proper teardown */
-       reinit_completion(&dp->audio_comp);
         dp_display_handle_plugged_change(g_dp_display, false);
  
         dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK |
@@ -669,10 +675,8 @@ static int dp_disconnect_pending_timeout(struct dp_display_private *dp, u32 data
         mutex_lock(&dp->event_mutex);
  
         state =  dp->hpd_state;
-       if (state == ST_DISCONNECT_PENDING) {
-               dp_display_disable(dp, 0);
+       if (state == ST_DISCONNECT_PENDING)
                 dp->hpd_state = ST_DISCONNECTED;
-       }
  
         mutex_unlock(&dp->event_mutex);
  
@@ -898,7 +902,6 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data)
         /* wait only if audio was enabled */
         if (dp_display->audio_enabled) {
                 /* signal the disconnect event */
-               reinit_completion(&dp->audio_comp);
                 dp_display_handle_plugged_change(dp_display, false);
                 if (!wait_for_completion_timeout(&dp->audio_comp,
                                 HZ * 5))
@@ -1272,7 +1275,12 @@ static int dp_pm_resume(struct device *dev)
  
         status = dp_catalog_link_is_connected(dp->catalog);
  
-       if (status)
+       /*
+        * can not declared display is connected unless
+        * HDMI cable is plugged in and sink_count of
+        * dongle become 1
+        */
+       if (status && dp->link->sink_count)
                 dp->dp_display.is_connected = true;
         else
                 dp->dp_display.is_connected = false;
diff --git a/drivers/gpu/drm/msm/dp/dp_display.h b/drivers/gpu/drm/msm/dp/dp_display.h

index 6092ba1..5173c89 100644 (file)
--- a/drivers/gpu/drm/msm/dp/dp_display.h
+++ b/drivers/gpu/drm/msm/dp/dp_display.h
@@ -34,6 +34,7 @@ int dp_display_get_modes(struct msm_dp *dp_display,
  int dp_display_request_irq(struct msm_dp *dp_display);
  bool dp_display_check_video_test(struct msm_dp *dp_display);
  int dp_display_get_test_bpp(struct msm_dp *dp_display);
+void dp_display_signal_audio_start(struct msm_dp *dp_display);
  void dp_display_signal_audio_complete(struct msm_dp *dp_display);
  
  #endif /* _DP_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c

index f0a2ddf..ff7f2ec 100644 (file)
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -843,7 +843,7 @@ int msm_dsi_phy_get_clk_provider(struct msm_dsi_phy *phy,
         if (pixel_clk_provider)
                 *pixel_clk_provider = phy->provided_clocks->hws[DSI_PIXEL_PLL_CLK]->clk;
  
-       return -EINVAL;
+       return 0;
  }
  
  void msm_dsi_phy_pll_save_state(struct msm_dsi_phy *phy)
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c

index 582b142..86e40a0 100644 (file)
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -405,6 +405,10 @@ static int pll_28nm_register(struct dsi_pll_28nm *pll_28nm, struct clk_hw **prov
         if (!vco_name)
                 return -ENOMEM;
  
+       parent_name = devm_kzalloc(dev, 32, GFP_KERNEL);
+       if (!parent_name)
+               return -ENOMEM;
+
         clk_name = devm_kzalloc(dev, 32, GFP_KERNEL);
         if (!clk_name)
                 return -ENOMEM;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c

index e1104d2..fe7d17c 100644 (file)
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -42,7 +42,7 @@
   * - 1.7.0 - Add MSM_PARAM_SUSPENDS to access suspend count
   */
  #define MSM_VERSION_MAJOR      1
-#define MSM_VERSION_MINOR      6
+#define MSM_VERSION_MINOR      7
  #define MSM_VERSION_PATCHLEVEL 0
  
  static const struct drm_mode_config_funcs mode_config_funcs = {
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c

index b199942..56df86e 100644 (file)
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -190,13 +190,25 @@ struct page **msm_gem_get_pages(struct drm_gem_object *obj)
         }
  
         p = get_pages(obj);
+
+       if (!IS_ERR(p)) {
+               msm_obj->pin_count++;
+               update_inactive(msm_obj);
+       }
+
         msm_gem_unlock(obj);
         return p;
  }
  
  void msm_gem_put_pages(struct drm_gem_object *obj)
  {
-       /* when we start tracking the pin count, then do something here */
+       struct msm_gem_object *msm_obj = to_msm_bo(obj);
+
+       msm_gem_lock(obj);
+       msm_obj->pin_count--;
+       GEM_WARN_ON(msm_obj->pin_count < 0);
+       update_inactive(msm_obj);
+       msm_gem_unlock(obj);
  }
  
  int msm_gem_mmap_obj(struct drm_gem_object *obj,
@@ -646,6 +658,8 @@ static void *get_vaddr(struct drm_gem_object *obj, unsigned madv)
                         ret = -ENOMEM;
                         goto fail;
                 }
+
+               update_inactive(msm_obj);
         }
  
         return msm_obj->vaddr;
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h

index a6480d2..03e2cc2 100644 (file)
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -221,7 +221,7 @@ static inline bool is_active(struct msm_gem_object *msm_obj)
  /* imported/exported objects are not purgeable: */
  static inline bool is_unpurgeable(struct msm_gem_object *msm_obj)
  {
-       return msm_obj->base.dma_buf && msm_obj->base.import_attach;
+       return msm_obj->base.import_attach || msm_obj->pin_count;
  }
  
  static inline bool is_purgeable(struct msm_gem_object *msm_obj)
@@ -271,7 +271,7 @@ static inline void mark_unpurgeable(struct msm_gem_object *msm_obj)
  
  static inline bool is_unevictable(struct msm_gem_object *msm_obj)
  {
-       return is_unpurgeable(msm_obj) || msm_obj->pin_count || msm_obj->vaddr;
+       return is_unpurgeable(msm_obj) || msm_obj->vaddr;
  }
  
  static inline void mark_evictable(struct msm_gem_object *msm_obj)
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c

index dd5ef64..769f666 100644 (file)
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -1687,102 +1687,102 @@ static int ni_populate_smc_initial_state(struct radeon_device *rdev,
         u32 reg;
         int ret;
  
-       table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+       table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
                 cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl);
-       table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 =
+       table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL_2 =
                 cpu_to_be32(ni_pi->clock_registers.mpll_ad_func_cntl_2);
-       table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+       table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
                 cpu_to_be32(ni_pi->clock_registers.mpll_dq_func_cntl);
-       table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL_2 =
+       table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL_2 =
                 cpu_to_be32(ni_pi->clock_registers.mpll_dq_func_cntl_2);
-       table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+       table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
                 cpu_to_be32(ni_pi->clock_registers.mclk_pwrmgt_cntl);
-       table->initialState.levels[0].mclk.vDLL_CNTL =
+       table->initialState.level.mclk.vDLL_CNTL =
                 cpu_to_be32(ni_pi->clock_registers.dll_cntl);
-       table->initialState.levels[0].mclk.vMPLL_SS =
+       table->initialState.level.mclk.vMPLL_SS =
                 cpu_to_be32(ni_pi->clock_registers.mpll_ss1);
-       table->initialState.levels[0].mclk.vMPLL_SS2 =
+       table->initialState.level.mclk.vMPLL_SS2 =
                 cpu_to_be32(ni_pi->clock_registers.mpll_ss2);
-       table->initialState.levels[0].mclk.mclk_value =
+       table->initialState.level.mclk.mclk_value =
                 cpu_to_be32(initial_state->performance_levels[0].mclk);
  
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
                 cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
                 cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_2);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
                 cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_3);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
                 cpu_to_be32(ni_pi->clock_registers.cg_spll_func_cntl_4);
-       table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+       table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
                 cpu_to_be32(ni_pi->clock_registers.cg_spll_spread_spectrum);
-       table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2 =
+       table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2 =
                 cpu_to_be32(ni_pi->clock_registers.cg_spll_spread_spectrum_2);
-       table->initialState.levels[0].sclk.sclk_value =
+       table->initialState.level.sclk.sclk_value =
                 cpu_to_be32(initial_state->performance_levels[0].sclk);
-       table->initialState.levels[0].arbRefreshState =
+       table->initialState.level.arbRefreshState =
                 NISLANDS_INITIAL_STATE_ARB_INDEX;
  
-       table->initialState.levels[0].ACIndex = 0;
+       table->initialState.level.ACIndex = 0;
  
         ret = ni_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
                                         initial_state->performance_levels[0].vddc,
-                                       &table->initialState.levels[0].vddc);
+                                       &table->initialState.level.vddc);
         if (!ret) {
                 u16 std_vddc;
  
                 ret = ni_get_std_voltage_value(rdev,
-                                              &table->initialState.levels[0].vddc,
+                                              &table->initialState.level.vddc,
                                                &std_vddc);
                 if (!ret)
                         ni_populate_std_voltage_value(rdev, std_vddc,
-                                                     table->initialState.levels[0].vddc.index,
-                                                     &table->initialState.levels[0].std_vddc);
+                                                     table->initialState.level.vddc.index,
+                                                     &table->initialState.level.std_vddc);
         }
  
         if (eg_pi->vddci_control)
                 ni_populate_voltage_value(rdev,
                                           &eg_pi->vddci_voltage_table,
                                           initial_state->performance_levels[0].vddci,
-                                         &table->initialState.levels[0].vddci);
+                                         &table->initialState.level.vddci);
  
-       ni_populate_initial_mvdd_value(rdev, &table->initialState.levels[0].mvdd);
+       ni_populate_initial_mvdd_value(rdev, &table->initialState.level.mvdd);
  
         reg = CG_R(0xffff) | CG_L(0);
-       table->initialState.levels[0].aT = cpu_to_be32(reg);
+       table->initialState.level.aT = cpu_to_be32(reg);
  
-       table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
+       table->initialState.level.bSP = cpu_to_be32(pi->dsp);
  
         if (pi->boot_in_gen2)
-               table->initialState.levels[0].gen2PCIE = 1;
+               table->initialState.level.gen2PCIE = 1;
         else
-               table->initialState.levels[0].gen2PCIE = 0;
+               table->initialState.level.gen2PCIE = 0;
  
         if (pi->mem_gddr5) {
-               table->initialState.levels[0].strobeMode =
+               table->initialState.level.strobeMode =
                         cypress_get_strobe_mode_settings(rdev,
                                                          initial_state->performance_levels[0].mclk);
  
                 if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-                       table->initialState.levels[0].mcFlags = NISLANDS_SMC_MC_EDC_RD_FLAG | NISLANDS_SMC_MC_EDC_WR_FLAG;
+                       table->initialState.level.mcFlags = NISLANDS_SMC_MC_EDC_RD_FLAG | NISLANDS_SMC_MC_EDC_WR_FLAG;
                 else
-                       table->initialState.levels[0].mcFlags =  0;
+                       table->initialState.level.mcFlags =  0;
         }
  
         table->initialState.levelCount = 1;
  
         table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
  
-       table->initialState.levels[0].dpm2.MaxPS = 0;
-       table->initialState.levels[0].dpm2.NearTDPDec = 0;
-       table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-       table->initialState.levels[0].dpm2.BelowSafeInc = 0;
+       table->initialState.level.dpm2.MaxPS = 0;
+       table->initialState.level.dpm2.NearTDPDec = 0;
+       table->initialState.level.dpm2.AboveSafeInc = 0;
+       table->initialState.level.dpm2.BelowSafeInc = 0;
  
         reg = MIN_POWER_MASK | MAX_POWER_MASK;
-       table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+       table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
  
         reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-       table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+       table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
  
         return 0;
  }
@@ -1813,43 +1813,43 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
         if (pi->acpi_vddc) {
                 ret = ni_populate_voltage_value(rdev,
                                                 &eg_pi->vddc_voltage_table,
-                                               pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+                                               pi->acpi_vddc, &table->ACPIState.level.vddc);
                 if (!ret) {
                         u16 std_vddc;
  
                         ret = ni_get_std_voltage_value(rdev,
-                                                      &table->ACPIState.levels[0].vddc, &std_vddc);
+                                                      &table->ACPIState.level.vddc, &std_vddc);
                         if (!ret)
                                 ni_populate_std_voltage_value(rdev, std_vddc,
-                                                             table->ACPIState.levels[0].vddc.index,
-                                                             &table->ACPIState.levels[0].std_vddc);
+                                                             table->ACPIState.level.vddc.index,
+                                                             &table->ACPIState.level.std_vddc);
                 }
  
                 if (pi->pcie_gen2) {
                         if (pi->acpi_pcie_gen2)
-                               table->ACPIState.levels[0].gen2PCIE = 1;
+                               table->ACPIState.level.gen2PCIE = 1;
                         else
-                               table->ACPIState.levels[0].gen2PCIE = 0;
+                               table->ACPIState.level.gen2PCIE = 0;
                 } else {
-                       table->ACPIState.levels[0].gen2PCIE = 0;
+                       table->ACPIState.level.gen2PCIE = 0;
                 }
         } else {
                 ret = ni_populate_voltage_value(rdev,
                                                 &eg_pi->vddc_voltage_table,
                                                 pi->min_vddc_in_table,
-                                               &table->ACPIState.levels[0].vddc);
+                                               &table->ACPIState.level.vddc);
                 if (!ret) {
                         u16 std_vddc;
  
                         ret = ni_get_std_voltage_value(rdev,
-                                                      &table->ACPIState.levels[0].vddc,
+                                                      &table->ACPIState.level.vddc,
                                                        &std_vddc);
                         if (!ret)
                                 ni_populate_std_voltage_value(rdev, std_vddc,
-                                                             table->ACPIState.levels[0].vddc.index,
-                                                             &table->ACPIState.levels[0].std_vddc);
+                                                             table->ACPIState.level.vddc.index,
+                                                             &table->ACPIState.level.std_vddc);
                 }
-               table->ACPIState.levels[0].gen2PCIE = 0;
+               table->ACPIState.level.gen2PCIE = 0;
         }
  
         if (eg_pi->acpi_vddci) {
@@ -1857,7 +1857,7 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
                         ni_populate_voltage_value(rdev,
                                                   &eg_pi->vddci_voltage_table,
                                                   eg_pi->acpi_vddci,
-                                                 &table->ACPIState.levels[0].vddci);
+                                                 &table->ACPIState.level.vddci);
         }
  
  
@@ -1900,37 +1900,37 @@ static int ni_populate_smc_acpi_state(struct radeon_device *rdev,
         spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
         spll_func_cntl_2 |= SCLK_MUX_SEL(4);
  
-       table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2);
-       table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(mpll_dq_func_cntl_2);
-       table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl);
-       table->ACPIState.levels[0].mclk.vDLL_CNTL = cpu_to_be32(dll_cntl);
+       table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL = cpu_to_be32(mpll_ad_func_cntl);
+       table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL_2 = cpu_to_be32(mpll_ad_func_cntl_2);
+       table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL = cpu_to_be32(mpll_dq_func_cntl);
+       table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL_2 = cpu_to_be32(mpll_dq_func_cntl_2);
+       table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL = cpu_to_be32(mclk_pwrmgt_cntl);
+       table->ACPIState.level.mclk.vDLL_CNTL = cpu_to_be32(dll_cntl);
  
-       table->ACPIState.levels[0].mclk.mclk_value = 0;
+       table->ACPIState.level.mclk.mclk_value = 0;
  
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4);
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL = cpu_to_be32(spll_func_cntl);
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 = cpu_to_be32(spll_func_cntl_2);
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 = cpu_to_be32(spll_func_cntl_3);
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 = cpu_to_be32(spll_func_cntl_4);
  
-       table->ACPIState.levels[0].sclk.sclk_value = 0;
+       table->ACPIState.level.sclk.sclk_value = 0;
  
-       ni_populate_mvdd_value(rdev, 0, &table->ACPIState.levels[0].mvdd);
+       ni_populate_mvdd_value(rdev, 0, &table->ACPIState.level.mvdd);
  
         if (eg_pi->dynamic_ac_timing)
-               table->ACPIState.levels[0].ACIndex = 1;
+               table->ACPIState.level.ACIndex = 1;
  
-       table->ACPIState.levels[0].dpm2.MaxPS = 0;
-       table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-       table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-       table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
+       table->ACPIState.level.dpm2.MaxPS = 0;
+       table->ACPIState.level.dpm2.NearTDPDec = 0;
+       table->ACPIState.level.dpm2.AboveSafeInc = 0;
+       table->ACPIState.level.dpm2.BelowSafeInc = 0;
  
         reg = MIN_POWER_MASK | MAX_POWER_MASK;
-       table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+       table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
  
         reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-       table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+       table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
  
         return 0;
  }
@@ -1980,7 +1980,9 @@ static int ni_init_smc_table(struct radeon_device *rdev)
         if (ret)
                 return ret;
  
-       table->driverState = table->initialState;
+       table->driverState.flags = table->initialState.flags;
+       table->driverState.levelCount = table->initialState.levelCount;
+       table->driverState.levels[0] = table->initialState.level;
  
         table->ULVState = table->initialState;
  
diff --git a/drivers/gpu/drm/radeon/nislands_smc.h b/drivers/gpu/drm/radeon/nislands_smc.h

index 7395cb6..42f3bab 100644 (file)
--- a/drivers/gpu/drm/radeon/nislands_smc.h
+++ b/drivers/gpu/drm/radeon/nislands_smc.h
@@ -143,6 +143,14 @@ struct NISLANDS_SMC_SWSTATE
  
  typedef struct NISLANDS_SMC_SWSTATE NISLANDS_SMC_SWSTATE;
  
+struct NISLANDS_SMC_SWSTATE_SINGLE {
+       uint8_t                             flags;
+       uint8_t                             levelCount;
+       uint8_t                             padding2;
+       uint8_t                             padding3;
+       NISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
  #define NISLANDS_SMC_VOLTAGEMASK_VDDC  0
  #define NISLANDS_SMC_VOLTAGEMASK_MVDD  1
  #define NISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -160,19 +168,19 @@ typedef struct NISLANDS_SMC_VOLTAGEMASKTABLE NISLANDS_SMC_VOLTAGEMASKTABLE;
  
  struct NISLANDS_SMC_STATETABLE
  {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint8_t                             highSMIO[NISLANDS_MAX_NO_VREG_STEPS];
-    uint32_t                            lowSMIO[NISLANDS_MAX_NO_VREG_STEPS];
-    NISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    PP_NIslands_DPM2Parameters          dpm2Params;
-    NISLANDS_SMC_SWSTATE                initialState;
-    NISLANDS_SMC_SWSTATE                ACPIState;
-    NISLANDS_SMC_SWSTATE                ULVState;
-    NISLANDS_SMC_SWSTATE                driverState;
-    NISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+       uint8_t                             thermalProtectType;
+       uint8_t                             systemFlags;
+       uint8_t                             maxVDDCIndexInPPTable;
+       uint8_t                             extraFlags;
+       uint8_t                             highSMIO[NISLANDS_MAX_NO_VREG_STEPS];
+       uint32_t                            lowSMIO[NISLANDS_MAX_NO_VREG_STEPS];
+       NISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
+       PP_NIslands_DPM2Parameters          dpm2Params;
+       struct NISLANDS_SMC_SWSTATE_SINGLE  initialState;
+       struct NISLANDS_SMC_SWSTATE_SINGLE  ACPIState;
+       struct NISLANDS_SMC_SWSTATE_SINGLE  ULVState;
+       NISLANDS_SMC_SWSTATE                driverState;
+       NISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[NISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
  };
  
  typedef struct NISLANDS_SMC_STATETABLE NISLANDS_SMC_STATETABLE;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h

index 42281fc..56ed563 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -1549,6 +1549,7 @@ struct radeon_dpm {
         void                    *priv;
         u32                     new_active_crtcs;
         int                     new_active_crtc_count;
+       int                     high_pixelclock_count;
         u32                     current_active_crtcs;
         int                     current_active_crtc_count;
         bool single_display;
diff --git a/drivers/gpu/drm/radeon/radeon_pm.c b/drivers/gpu/drm/radeon/radeon_pm.c

index 0c1950f..3861c0b 100644 (file)
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@@ -1767,6 +1767,7 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
         struct drm_device *ddev = rdev->ddev;
         struct drm_crtc *crtc;
         struct radeon_crtc *radeon_crtc;
+       struct radeon_connector *radeon_connector;
  
         if (!rdev->pm.dpm_enabled)
                 return;
@@ -1776,6 +1777,7 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
         /* update active crtc counts */
         rdev->pm.dpm.new_active_crtcs = 0;
         rdev->pm.dpm.new_active_crtc_count = 0;
+       rdev->pm.dpm.high_pixelclock_count = 0;
         if (rdev->num_crtc && rdev->mode_info.mode_config_initialized) {
                 list_for_each_entry(crtc,
                                     &ddev->mode_config.crtc_list, head) {
@@ -1783,6 +1785,12 @@ static void radeon_pm_compute_clocks_dpm(struct radeon_device *rdev)
                         if (crtc->enabled) {
                                 rdev->pm.dpm.new_active_crtcs |= (1 << radeon_crtc->crtc_id);
                                 rdev->pm.dpm.new_active_crtc_count++;
+                               if (!radeon_crtc->connector)
+                                       continue;
+
+                               radeon_connector = to_radeon_connector(radeon_crtc->connector);
+                               if (radeon_connector->pixelclock_for_modeset > 297000)
+                                       rdev->pm.dpm.high_pixelclock_count++;
                         }
                 }
         }
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c

index 9186095..3add39c 100644 (file)
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -2979,6 +2979,9 @@ static void si_apply_state_adjust_rules(struct radeon_device *rdev,
                     (rdev->pdev->device == 0x6605)) {
                         max_sclk = 75000;
                 }
+
+               if (rdev->pm.dpm.high_pixelclock_count > 1)
+                       disable_sclk_switching = true;
         }
  
         if (rps->vce_active) {
@@ -4350,70 +4353,70 @@ static int si_populate_smc_initial_state(struct radeon_device *rdev,
         u32 reg;
         int ret;
  
-       table->initialState.levels[0].mclk.vDLL_CNTL =
+       table->initialState.level.mclk.vDLL_CNTL =
                 cpu_to_be32(si_pi->clock_registers.dll_cntl);
-       table->initialState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+       table->initialState.level.mclk.vMCLK_PWRMGT_CNTL =
                 cpu_to_be32(si_pi->clock_registers.mclk_pwrmgt_cntl);
-       table->initialState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+       table->initialState.level.mclk.vMPLL_AD_FUNC_CNTL =
                 cpu_to_be32(si_pi->clock_registers.mpll_ad_func_cntl);
-       table->initialState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+       table->initialState.level.mclk.vMPLL_DQ_FUNC_CNTL =
                 cpu_to_be32(si_pi->clock_registers.mpll_dq_func_cntl);
-       table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL =
+       table->initialState.level.mclk.vMPLL_FUNC_CNTL =
                 cpu_to_be32(si_pi->clock_registers.mpll_func_cntl);
-       table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+       table->initialState.level.mclk.vMPLL_FUNC_CNTL_1 =
                 cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_1);
-       table->initialState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+       table->initialState.level.mclk.vMPLL_FUNC_CNTL_2 =
                 cpu_to_be32(si_pi->clock_registers.mpll_func_cntl_2);
-       table->initialState.levels[0].mclk.vMPLL_SS =
+       table->initialState.level.mclk.vMPLL_SS =
                 cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-       table->initialState.levels[0].mclk.vMPLL_SS2 =
+       table->initialState.level.mclk.vMPLL_SS2 =
                 cpu_to_be32(si_pi->clock_registers.mpll_ss2);
  
-       table->initialState.levels[0].mclk.mclk_value =
+       table->initialState.level.mclk.mclk_value =
                 cpu_to_be32(initial_state->performance_levels[0].mclk);
  
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_2);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_3);
-       table->initialState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+       table->initialState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_func_cntl_4);
-       table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM =
+       table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum);
-       table->initialState.levels[0].sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
+       table->initialState.level.sclk.vCG_SPLL_SPREAD_SPECTRUM_2  =
                 cpu_to_be32(si_pi->clock_registers.cg_spll_spread_spectrum_2);
  
-       table->initialState.levels[0].sclk.sclk_value =
+       table->initialState.level.sclk.sclk_value =
                 cpu_to_be32(initial_state->performance_levels[0].sclk);
  
-       table->initialState.levels[0].arbRefreshState =
+       table->initialState.level.arbRefreshState =
                 SISLANDS_INITIAL_STATE_ARB_INDEX;
  
-       table->initialState.levels[0].ACIndex = 0;
+       table->initialState.level.ACIndex = 0;
  
         ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
                                         initial_state->performance_levels[0].vddc,
-                                       &table->initialState.levels[0].vddc);
+                                       &table->initialState.level.vddc);
  
         if (!ret) {
                 u16 std_vddc;
  
                 ret = si_get_std_voltage_value(rdev,
-                                              &table->initialState.levels[0].vddc,
+                                              &table->initialState.level.vddc,
                                                &std_vddc);
                 if (!ret)
                         si_populate_std_voltage_value(rdev, std_vddc,
-                                                     table->initialState.levels[0].vddc.index,
-                                                     &table->initialState.levels[0].std_vddc);
+                                                     table->initialState.level.vddc.index,
+                                                     &table->initialState.level.std_vddc);
         }
  
         if (eg_pi->vddci_control)
                 si_populate_voltage_value(rdev,
                                           &eg_pi->vddci_voltage_table,
                                           initial_state->performance_levels[0].vddci,
-                                         &table->initialState.levels[0].vddci);
+                                         &table->initialState.level.vddci);
  
         if (si_pi->vddc_phase_shed_control)
                 si_populate_phase_shedding_value(rdev,
@@ -4421,43 +4424,43 @@ static int si_populate_smc_initial_state(struct radeon_device *rdev,
                                                  initial_state->performance_levels[0].vddc,
                                                  initial_state->performance_levels[0].sclk,
                                                  initial_state->performance_levels[0].mclk,
-                                                &table->initialState.levels[0].vddc);
+                                                &table->initialState.level.vddc);
  
-       si_populate_initial_mvdd_value(rdev, &table->initialState.levels[0].mvdd);
+       si_populate_initial_mvdd_value(rdev, &table->initialState.level.mvdd);
  
         reg = CG_R(0xffff) | CG_L(0);
-       table->initialState.levels[0].aT = cpu_to_be32(reg);
+       table->initialState.level.aT = cpu_to_be32(reg);
  
-       table->initialState.levels[0].bSP = cpu_to_be32(pi->dsp);
+       table->initialState.level.bSP = cpu_to_be32(pi->dsp);
  
-       table->initialState.levels[0].gen2PCIE = (u8)si_pi->boot_pcie_gen;
+       table->initialState.level.gen2PCIE = (u8)si_pi->boot_pcie_gen;
  
         if (pi->mem_gddr5) {
-               table->initialState.levels[0].strobeMode =
+               table->initialState.level.strobeMode =
                         si_get_strobe_mode_settings(rdev,
                                                     initial_state->performance_levels[0].mclk);
  
                 if (initial_state->performance_levels[0].mclk > pi->mclk_edc_enable_threshold)
-                       table->initialState.levels[0].mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
+                       table->initialState.level.mcFlags = SISLANDS_SMC_MC_EDC_RD_FLAG | SISLANDS_SMC_MC_EDC_WR_FLAG;
                 else
-                       table->initialState.levels[0].mcFlags =  0;
+                       table->initialState.level.mcFlags =  0;
         }
  
         table->initialState.levelCount = 1;
  
         table->initialState.flags |= PPSMC_SWSTATE_FLAG_DC;
  
-       table->initialState.levels[0].dpm2.MaxPS = 0;
-       table->initialState.levels[0].dpm2.NearTDPDec = 0;
-       table->initialState.levels[0].dpm2.AboveSafeInc = 0;
-       table->initialState.levels[0].dpm2.BelowSafeInc = 0;
-       table->initialState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+       table->initialState.level.dpm2.MaxPS = 0;
+       table->initialState.level.dpm2.NearTDPDec = 0;
+       table->initialState.level.dpm2.AboveSafeInc = 0;
+       table->initialState.level.dpm2.BelowSafeInc = 0;
+       table->initialState.level.dpm2.PwrEfficiencyRatio = 0;
  
         reg = MIN_POWER_MASK | MAX_POWER_MASK;
-       table->initialState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+       table->initialState.level.SQPowerThrottle = cpu_to_be32(reg);
  
         reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-       table->initialState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+       table->initialState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
  
         return 0;
  }
@@ -4488,18 +4491,18 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
  
         if (pi->acpi_vddc) {
                 ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
-                                               pi->acpi_vddc, &table->ACPIState.levels[0].vddc);
+                                               pi->acpi_vddc, &table->ACPIState.level.vddc);
                 if (!ret) {
                         u16 std_vddc;
  
                         ret = si_get_std_voltage_value(rdev,
-                                                      &table->ACPIState.levels[0].vddc, &std_vddc);
+                                                      &table->ACPIState.level.vddc, &std_vddc);
                         if (!ret)
                                 si_populate_std_voltage_value(rdev, std_vddc,
-                                                             table->ACPIState.levels[0].vddc.index,
-                                                             &table->ACPIState.levels[0].std_vddc);
+                                                             table->ACPIState.level.vddc.index,
+                                                             &table->ACPIState.level.std_vddc);
                 }
-               table->ACPIState.levels[0].gen2PCIE = si_pi->acpi_pcie_gen;
+               table->ACPIState.level.gen2PCIE = si_pi->acpi_pcie_gen;
  
                 if (si_pi->vddc_phase_shed_control) {
                         si_populate_phase_shedding_value(rdev,
@@ -4507,23 +4510,23 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
                                                          pi->acpi_vddc,
                                                          0,
                                                          0,
-                                                        &table->ACPIState.levels[0].vddc);
+                                                        &table->ACPIState.level.vddc);
                 }
         } else {
                 ret = si_populate_voltage_value(rdev, &eg_pi->vddc_voltage_table,
-                                               pi->min_vddc_in_table, &table->ACPIState.levels[0].vddc);
+                                               pi->min_vddc_in_table, &table->ACPIState.level.vddc);
                 if (!ret) {
                         u16 std_vddc;
  
                         ret = si_get_std_voltage_value(rdev,
-                                                      &table->ACPIState.levels[0].vddc, &std_vddc);
+                                                      &table->ACPIState.level.vddc, &std_vddc);
  
                         if (!ret)
                                 si_populate_std_voltage_value(rdev, std_vddc,
-                                                             table->ACPIState.levels[0].vddc.index,
-                                                             &table->ACPIState.levels[0].std_vddc);
+                                                             table->ACPIState.level.vddc.index,
+                                                             &table->ACPIState.level.std_vddc);
                 }
-               table->ACPIState.levels[0].gen2PCIE = (u8)r600_get_pcie_gen_support(rdev,
+               table->ACPIState.level.gen2PCIE = (u8)r600_get_pcie_gen_support(rdev,
                                                                                     si_pi->sys_pcie_mask,
                                                                                     si_pi->boot_pcie_gen,
                                                                                     RADEON_PCIE_GEN1);
@@ -4534,14 +4537,14 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
                                                          pi->min_vddc_in_table,
                                                          0,
                                                          0,
-                                                        &table->ACPIState.levels[0].vddc);
+                                                        &table->ACPIState.level.vddc);
         }
  
         if (pi->acpi_vddc) {
                 if (eg_pi->acpi_vddci)
                         si_populate_voltage_value(rdev, &eg_pi->vddci_voltage_table,
                                                   eg_pi->acpi_vddci,
-                                                 &table->ACPIState.levels[0].vddci);
+                                                 &table->ACPIState.level.vddci);
         }
  
         mclk_pwrmgt_cntl |= MRDCK0_RESET | MRDCK1_RESET;
@@ -4552,59 +4555,59 @@ static int si_populate_smc_acpi_state(struct radeon_device *rdev,
         spll_func_cntl_2 &= ~SCLK_MUX_SEL_MASK;
         spll_func_cntl_2 |= SCLK_MUX_SEL(4);
  
-       table->ACPIState.levels[0].mclk.vDLL_CNTL =
+       table->ACPIState.level.mclk.vDLL_CNTL =
                 cpu_to_be32(dll_cntl);
-       table->ACPIState.levels[0].mclk.vMCLK_PWRMGT_CNTL =
+       table->ACPIState.level.mclk.vMCLK_PWRMGT_CNTL =
                 cpu_to_be32(mclk_pwrmgt_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_AD_FUNC_CNTL =
+       table->ACPIState.level.mclk.vMPLL_AD_FUNC_CNTL =
                 cpu_to_be32(mpll_ad_func_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_DQ_FUNC_CNTL =
+       table->ACPIState.level.mclk.vMPLL_DQ_FUNC_CNTL =
                 cpu_to_be32(mpll_dq_func_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL =
+       table->ACPIState.level.mclk.vMPLL_FUNC_CNTL =
                 cpu_to_be32(mpll_func_cntl);
-       table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_1 =
+       table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_1 =
                 cpu_to_be32(mpll_func_cntl_1);
-       table->ACPIState.levels[0].mclk.vMPLL_FUNC_CNTL_2 =
+       table->ACPIState.level.mclk.vMPLL_FUNC_CNTL_2 =
                 cpu_to_be32(mpll_func_cntl_2);
-       table->ACPIState.levels[0].mclk.vMPLL_SS =
+       table->ACPIState.level.mclk.vMPLL_SS =
                 cpu_to_be32(si_pi->clock_registers.mpll_ss1);
-       table->ACPIState.levels[0].mclk.vMPLL_SS2 =
+       table->ACPIState.level.mclk.vMPLL_SS2 =
                 cpu_to_be32(si_pi->clock_registers.mpll_ss2);
  
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL =
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL =
                 cpu_to_be32(spll_func_cntl);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_2 =
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_2 =
                 cpu_to_be32(spll_func_cntl_2);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_3 =
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_3 =
                 cpu_to_be32(spll_func_cntl_3);
-       table->ACPIState.levels[0].sclk.vCG_SPLL_FUNC_CNTL_4 =
+       table->ACPIState.level.sclk.vCG_SPLL_FUNC_CNTL_4 =
                 cpu_to_be32(spll_func_cntl_4);
  
-       table->ACPIState.levels[0].mclk.mclk_value = 0;
-       table->ACPIState.levels[0].sclk.sclk_value = 0;
+       table->ACPIState.level.mclk.mclk_value = 0;
+       table->ACPIState.level.sclk.sclk_value = 0;
  
-       si_populate_mvdd_value(rdev, 0, &table->ACPIState.levels[0].mvdd);
+       si_populate_mvdd_value(rdev, 0, &table->ACPIState.level.mvdd);
  
         if (eg_pi->dynamic_ac_timing)
-               table->ACPIState.levels[0].ACIndex = 0;
+               table->ACPIState.level.ACIndex = 0;
  
-       table->ACPIState.levels[0].dpm2.MaxPS = 0;
-       table->ACPIState.levels[0].dpm2.NearTDPDec = 0;
-       table->ACPIState.levels[0].dpm2.AboveSafeInc = 0;
-       table->ACPIState.levels[0].dpm2.BelowSafeInc = 0;
-       table->ACPIState.levels[0].dpm2.PwrEfficiencyRatio = 0;
+       table->ACPIState.level.dpm2.MaxPS = 0;
+       table->ACPIState.level.dpm2.NearTDPDec = 0;
+       table->ACPIState.level.dpm2.AboveSafeInc = 0;
+       table->ACPIState.level.dpm2.BelowSafeInc = 0;
+       table->ACPIState.level.dpm2.PwrEfficiencyRatio = 0;
  
         reg = MIN_POWER_MASK | MAX_POWER_MASK;
-       table->ACPIState.levels[0].SQPowerThrottle = cpu_to_be32(reg);
+       table->ACPIState.level.SQPowerThrottle = cpu_to_be32(reg);
  
         reg = MAX_POWER_DELTA_MASK | STI_SIZE_MASK | LTI_RATIO_MASK;
-       table->ACPIState.levels[0].SQPowerThrottle_2 = cpu_to_be32(reg);
+       table->ACPIState.level.SQPowerThrottle_2 = cpu_to_be32(reg);
  
         return 0;
  }
  
  static int si_populate_ulv_state(struct radeon_device *rdev,
-                                SISLANDS_SMC_SWSTATE *state)
+                                struct SISLANDS_SMC_SWSTATE_SINGLE *state)
  {
         struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
         struct si_power_info *si_pi = si_get_pi(rdev);
@@ -4613,19 +4616,19 @@ static int si_populate_ulv_state(struct radeon_device *rdev,
         int ret;
  
         ret = si_convert_power_level_to_smc(rdev, &ulv->pl,
-                                           &state->levels[0]);
+                                           &state->level);
         if (!ret) {
                 if (eg_pi->sclk_deep_sleep) {
                         if (sclk_in_sr <= SCLK_MIN_DEEPSLEEP_FREQ)
-                               state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
+                               state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_BYPASS;
                         else
-                               state->levels[0].stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
+                               state->level.stateFlags |= PPSMC_STATEFLAG_DEEPSLEEP_THROTTLE;
                 }
                 if (ulv->one_pcie_lane_in_ulv)
                         state->flags |= PPSMC_SWSTATE_FLAG_PCIE_X1;
-               state->levels[0].arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
-               state->levels[0].ACIndex = 1;
-               state->levels[0].std_vddc = state->levels[0].vddc;
+               state->level.arbRefreshState = (u8)(SISLANDS_ULV_STATE_ARB_INDEX);
+               state->level.ACIndex = 1;
+               state->level.std_vddc = state->level.vddc;
                 state->levelCount = 1;
  
                 state->flags |= PPSMC_SWSTATE_FLAG_DC;
@@ -4725,7 +4728,9 @@ static int si_init_smc_table(struct radeon_device *rdev)
         if (ret)
                 return ret;
  
-       table->driverState = table->initialState;
+       table->driverState.flags = table->initialState.flags;
+       table->driverState.levelCount = table->initialState.levelCount;
+       table->driverState.levels[0] = table->initialState.level;
  
         ret = si_do_program_memory_timing_parameters(rdev, radeon_boot_state,
                                                      SISLANDS_INITIAL_STATE_ARB_INDEX);
@@ -5275,8 +5280,8 @@ static int si_upload_ulv_state(struct radeon_device *rdev)
         if (ulv->supported && ulv->pl.vddc) {
                 u32 address = si_pi->state_table_start +
                         offsetof(SISLANDS_SMC_STATETABLE, ULVState);
-               SISLANDS_SMC_SWSTATE *smc_state = &si_pi->smc_statetable.ULVState;
-               u32 state_size = sizeof(SISLANDS_SMC_SWSTATE);
+               struct SISLANDS_SMC_SWSTATE_SINGLE *smc_state = &si_pi->smc_statetable.ULVState;
+               u32 state_size = sizeof(struct SISLANDS_SMC_SWSTATE_SINGLE);
  
                 memset(smc_state, 0, state_size);
  
diff --git a/drivers/gpu/drm/radeon/sislands_smc.h b/drivers/gpu/drm/radeon/sislands_smc.h

index fbd6589..4ea1cb2 100644 (file)
--- a/drivers/gpu/drm/radeon/sislands_smc.h
+++ b/drivers/gpu/drm/radeon/sislands_smc.h
@@ -191,6 +191,14 @@ struct SISLANDS_SMC_SWSTATE
  
  typedef struct SISLANDS_SMC_SWSTATE SISLANDS_SMC_SWSTATE;
  
+struct SISLANDS_SMC_SWSTATE_SINGLE {
+       uint8_t                             flags;
+       uint8_t                             levelCount;
+       uint8_t                             padding2;
+       uint8_t                             padding3;
+       SISLANDS_SMC_HW_PERFORMANCE_LEVEL   level;
+};
+
  #define SISLANDS_SMC_VOLTAGEMASK_VDDC  0
  #define SISLANDS_SMC_VOLTAGEMASK_MVDD  1
  #define SISLANDS_SMC_VOLTAGEMASK_VDDCI 2
@@ -208,19 +216,19 @@ typedef struct SISLANDS_SMC_VOLTAGEMASKTABLE SISLANDS_SMC_VOLTAGEMASKTABLE;
  
  struct SISLANDS_SMC_STATETABLE
  {
-    uint8_t                             thermalProtectType;
-    uint8_t                             systemFlags;
-    uint8_t                             maxVDDCIndexInPPTable;
-    uint8_t                             extraFlags;
-    uint32_t                            lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
-    SISLANDS_SMC_VOLTAGEMASKTABLE       voltageMaskTable;
-    SISLANDS_SMC_VOLTAGEMASKTABLE       phaseMaskTable;
-    PP_SIslands_DPM2Parameters          dpm2Params;
-    SISLANDS_SMC_SWSTATE                initialState;
-    SISLANDS_SMC_SWSTATE                ACPIState;
-    SISLANDS_SMC_SWSTATE                ULVState;
-    SISLANDS_SMC_SWSTATE                driverState;
-    SISLANDS_SMC_HW_PERFORMANCE_LEVEL   dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE - 1];
+       uint8_t                                 thermalProtectType;
+       uint8_t                                 systemFlags;
+       uint8_t                                 maxVDDCIndexInPPTable;
+       uint8_t                                 extraFlags;
+       uint32_t                                lowSMIO[SISLANDS_MAX_NO_VREG_STEPS];
+       SISLANDS_SMC_VOLTAGEMASKTABLE           voltageMaskTable;
+       SISLANDS_SMC_VOLTAGEMASKTABLE           phaseMaskTable;
+       PP_SIslands_DPM2Parameters              dpm2Params;
+       struct SISLANDS_SMC_SWSTATE_SINGLE      initialState;
+       struct SISLANDS_SMC_SWSTATE_SINGLE      ACPIState;
+       struct SISLANDS_SMC_SWSTATE_SINGLE      ULVState;
+       SISLANDS_SMC_SWSTATE                    driverState;
+       SISLANDS_SMC_HW_PERFORMANCE_LEVEL       dpmLevels[SISLANDS_MAX_SMC_PERFORMANCE_LEVELS_PER_SWSTATE];
  };
  
  typedef struct SISLANDS_SMC_STATETABLE SISLANDS_SMC_STATETABLE;
diff --git a/drivers/gpu/drm/vc4/vc4_vec.c b/drivers/gpu/drm/vc4/vc4_vec.c

index bd5b8eb..090529d 100644 (file)
--- a/drivers/gpu/drm/vc4/vc4_vec.c
+++ b/drivers/gpu/drm/vc4/vc4_vec.c
@@ -197,12 +197,6 @@ struct vc4_vec_connector {
         struct drm_encoder *encoder;
  };
  
-static inline struct vc4_vec_connector *
-to_vc4_vec_connector(struct drm_connector *connector)
-{
-       return container_of(connector, struct vc4_vec_connector, base);
-}
-
  enum vc4_vec_tv_mode_id {
         VC4_VEC_TV_MODE_NTSC,
         VC4_VEC_TV_MODE_NTSC_J,
diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c

index 5677263..483cd75 100644 (file)
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -485,7 +485,7 @@ static int adm9240_in_write(struct device *dev, u32 attr, int channel, long val)
                 reg = ADM9240_REG_IN_MIN(channel);
                 break;
         case hwmon_in_max:
-               reg = ADM9240_REG_IN(channel);
+               reg = ADM9240_REG_IN_MAX(channel);
                 break;
         default:
                 return -EOPNOTSUPP;
diff --git a/drivers/hwmon/corsair-psu.c b/drivers/hwmon/corsair-psu.c

index 3a5807e..02298b8 100644 (file)
--- a/drivers/hwmon/corsair-psu.c
+++ b/drivers/hwmon/corsair-psu.c
@@ -355,7 +355,7 @@ static umode_t corsairpsu_hwmon_power_is_visible(const struct corsairpsu_data *p
                 return 0444;
         default:
                 return 0;
-       };
+       }
  }
  
  static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv, u32 attr,
@@ -376,7 +376,7 @@ static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv
                 break;
         default:
                 break;
-       };
+       }
  
         return res;
  }
diff --git a/drivers/hwmon/ltc2992.c b/drivers/hwmon/ltc2992.c

index 4382105..2a4bed0 100644 (file)
--- a/drivers/hwmon/ltc2992.c
+++ b/drivers/hwmon/ltc2992.c
@@ -900,11 +900,15 @@ static int ltc2992_parse_dt(struct ltc2992_state *st)
  
         fwnode_for_each_available_child_node(fwnode, child) {
                 ret = fwnode_property_read_u32(child, "reg", &addr);
-               if (ret < 0)
+               if (ret < 0) {
+                       fwnode_handle_put(child);
                         return ret;
+               }
  
-               if (addr > 1)
+               if (addr > 1) {
+                       fwnode_handle_put(child);
                         return -EINVAL;
+               }
  
                 ret = fwnode_property_read_u32(child, "shunt-resistor-micro-ohms", &val);
                 if (!ret)
diff --git a/drivers/hwmon/occ/common.c b/drivers/hwmon/occ/common.c

index f1ac153..967532a 100644 (file)
--- a/drivers/hwmon/occ/common.c
+++ b/drivers/hwmon/occ/common.c
@@ -217,9 +217,9 @@ int occ_update_response(struct occ *occ)
                 return rc;
  
         /* limit the maximum rate of polling the OCC */
-       if (time_after(jiffies, occ->last_update + OCC_UPDATE_FREQUENCY)) {
+       if (time_after(jiffies, occ->next_update)) {
                 rc = occ_poll(occ);
-               occ->last_update = jiffies;
+               occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
         } else {
                 rc = occ->last_error;
         }
@@ -1165,6 +1165,7 @@ int occ_setup(struct occ *occ, const char *name)
                 return rc;
         }
  
+       occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
         occ_parse_poll_response(occ);
  
         rc = occ_setup_sensor_attrs(occ);
diff --git a/drivers/hwmon/occ/common.h b/drivers/hwmon/occ/common.h

index 67e6968..e6df719 100644 (file)
--- a/drivers/hwmon/occ/common.h
+++ b/drivers/hwmon/occ/common.h
@@ -99,7 +99,7 @@ struct occ {
         u8 poll_cmd_data;               /* to perform OCC poll command */
         int (*send_cmd)(struct occ *occ, u8 *cmd);
  
-       unsigned long last_update;
+       unsigned long next_update;
         struct mutex lock;              /* lock OCC access */
  
         struct device *hwmon;
diff --git a/drivers/hwmon/pmbus/fsp-3y.c b/drivers/hwmon/pmbus/fsp-3y.c

index b177987..e248424 100644 (file)
--- a/drivers/hwmon/pmbus/fsp-3y.c
+++ b/drivers/hwmon/pmbus/fsp-3y.c
@@ -57,7 +57,7 @@ static int page_log_to_page_real(int page_log, enum chips chip)
                 case YH5151E_PAGE_12V_LOG:
                         return YH5151E_PAGE_12V_REAL;
                 case YH5151E_PAGE_5V_LOG:
-                       return YH5151E_PAGE_5V_LOG;
+                       return YH5151E_PAGE_5V_REAL;
                 case YH5151E_PAGE_3V3_LOG:
                         return YH5151E_PAGE_3V3_REAL;
                 }
@@ -103,8 +103,18 @@ static int set_page(struct i2c_client *client, int page_log)
  
  static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
  {
+       const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+       struct fsp3y_data *data = to_fsp3y_data(info);
         int rv;
  
+       /*
+        * YH5151-E outputs vout in linear11. The conversion is done when
+        * reading. Here, we have to inject pmbus_core with the correct
+        * exponent (it is -6).
+        */
+       if (data->chip == yh5151e && reg == PMBUS_VOUT_MODE)
+               return 0x1A;
+
         rv = set_page(client, page);
         if (rv < 0)
                 return rv;
@@ -114,6 +124,8 @@ static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
  
  static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, int reg)
  {
+       const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+       struct fsp3y_data *data = to_fsp3y_data(info);
         int rv;
  
         /*
@@ -144,7 +156,18 @@ static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase,
         if (rv < 0)
                 return rv;
  
-       return i2c_smbus_read_word_data(client, reg);
+       rv = i2c_smbus_read_word_data(client, reg);
+       if (rv < 0)
+               return rv;
+
+       /*
+        * YH-5151E is non-compliant and outputs output voltages in linear11
+        * instead of linear16.
+        */
+       if (data->chip == yh5151e && reg == PMBUS_READ_VOUT)
+               rv = sign_extend32(rv, 10) & 0xffff;
+
+       return rv;
  }
  
  static struct pmbus_driver_info fsp3y_info[] = {
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c

index 522c9b2..762125f 100644 (file)
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2901,7 +2901,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
                 ctrl->hmmaxd = le16_to_cpu(id->hmmaxd);
         }
  
-       ret = nvme_mpath_init(ctrl, id);
+       ret = nvme_mpath_init_identify(ctrl, id);
         if (ret < 0)
                 goto out_free;
  
@@ -4364,6 +4364,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
                 min(default_ps_max_latency_us, (unsigned long)S32_MAX));
  
         nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
+       nvme_mpath_init_ctrl(ctrl);
  
         return 0;
  out_free_name:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c

index 0551796..f81871c 100644 (file)
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -781,9 +781,18 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head)
         put_disk(head->disk);
  }
  
-int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
  {
-       int error;
+       mutex_init(&ctrl->ana_lock);
+       timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
+       INIT_WORK(&ctrl->ana_work, nvme_ana_work);
+}
+
+int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
+{
+       size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT;
+       size_t ana_log_size;
+       int error = 0;
  
         /* check if multipath is enabled and we have the capability */
         if (!multipath || !ctrl->subsys ||
@@ -795,37 +804,31 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
         ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
         ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
  
-       mutex_init(&ctrl->ana_lock);
-       timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
-       ctrl->ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
-               ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc);
-       ctrl->ana_log_size += ctrl->max_namespaces * sizeof(__le32);
-
-       if (ctrl->ana_log_size > ctrl->max_hw_sectors << SECTOR_SHIFT) {
+       ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
+               ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) +
+               ctrl->max_namespaces * sizeof(__le32);
+       if (ana_log_size > max_transfer_size) {
                 dev_err(ctrl->device,
-                       "ANA log page size (%zd) larger than MDTS (%d).\n",
-                       ctrl->ana_log_size,
-                       ctrl->max_hw_sectors << SECTOR_SHIFT);
+                       "ANA log page size (%zd) larger than MDTS (%zd).\n",
+                       ana_log_size, max_transfer_size);
                 dev_err(ctrl->device, "disabling ANA support.\n");
-               return 0;
+               goto out_uninit;
         }
-
-       INIT_WORK(&ctrl->ana_work, nvme_ana_work);
-       kfree(ctrl->ana_log_buf);
-       ctrl->ana_log_buf = kmalloc(ctrl->ana_log_size, GFP_KERNEL);
-       if (!ctrl->ana_log_buf) {
-               error = -ENOMEM;
-               goto out;
+       if (ana_log_size > ctrl->ana_log_size) {
+               nvme_mpath_stop(ctrl);
+               kfree(ctrl->ana_log_buf);
+               ctrl->ana_log_buf = kmalloc(ana_log_size, GFP_KERNEL);
+               if (!ctrl->ana_log_buf)
+                       return -ENOMEM;
         }
-
+       ctrl->ana_log_size = ana_log_size;
         error = nvme_read_ana_log(ctrl);
         if (error)
-               goto out_free_ana_log_buf;
+               goto out_uninit;
         return 0;
-out_free_ana_log_buf:
-       kfree(ctrl->ana_log_buf);
-       ctrl->ana_log_buf = NULL;
-out:
+
+out_uninit:
+       nvme_mpath_uninit(ctrl);
         return error;
  }
  
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h

index 05f31a2..0015860 100644 (file)
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -712,7 +712,8 @@ void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl);
  int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head);
  void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id);
  void nvme_mpath_remove_disk(struct nvme_ns_head *head);
-int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
+void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl);
  void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
  void nvme_mpath_stop(struct nvme_ctrl *ctrl);
  bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
@@ -780,7 +781,10 @@ static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
  static inline void nvme_trace_bio_complete(struct request *req)
  {
  }
-static inline int nvme_mpath_init(struct nvme_ctrl *ctrl,
+static inline void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
+{
+}
+static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl,
                 struct nvme_id_ctrl *id)
  {
         if (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c

index e7a367c..dcd49a7 100644 (file)
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -975,10 +975,7 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req)
         case nvme_admin_keep_alive:
                 req->execute = nvmet_execute_keep_alive;
                 return 0;
+       default:
+               return nvmet_report_invalid_opcode(req);
         }
-
-       pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode,
-              req->sq->qid);
-       req->error_loc = offsetof(struct nvme_common_command, opcode);
-       return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
  }
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c

index 4845d12..fc3645f 100644 (file)
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -379,7 +379,7 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req)
                 req->execute = nvmet_execute_disc_identify;
                 return 0;
         default:
-               pr_err("unhandled cmd %d\n", cmd->common.opcode);
+               pr_debug("unhandled cmd %d\n", cmd->common.opcode);
                 req->error_loc = offsetof(struct nvme_common_command, opcode);
                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
         }
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c

index 1420a8e..7d0f352 100644 (file)
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -94,7 +94,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req)
                 req->execute = nvmet_execute_prop_get;
                 break;
         default:
-               pr_err("received unknown capsule type 0x%x\n",
+               pr_debug("received unknown capsule type 0x%x\n",
                         cmd->fabrics.fctype);
                 req->error_loc = offsetof(struct nvmf_common_command, fctype);
                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
@@ -284,13 +284,13 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req)
         struct nvme_command *cmd = req->cmd;
  
         if (!nvme_is_fabrics(cmd)) {
-               pr_err("invalid command 0x%x on unconnected queue.\n",
+               pr_debug("invalid command 0x%x on unconnected queue.\n",
                         cmd->fabrics.opcode);
                 req->error_loc = offsetof(struct nvme_common_command, opcode);
                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
         }
         if (cmd->fabrics.fctype != nvme_fabrics_type_connect) {
-               pr_err("invalid capsule type 0x%x on unconnected queue.\n",
+               pr_debug("invalid capsule type 0x%x on unconnected queue.\n",
                         cmd->fabrics.fctype);
                 req->error_loc = offsetof(struct nvmf_common_command, fctype);
                 return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c

index 9a8b372..429263c 100644 (file)
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -258,7 +258,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
  
         sector = nvmet_lba_to_sect(req->ns, req->cmd->rw.slba);
  
-       if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
+       if (nvmet_use_inline_bvec(req)) {
                 bio = &req->b.inline_bio;
                 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
         } else {
diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c

index 715d437..7fdbdc4 100644 (file)
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -49,9 +49,11 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
  
         ns->file = filp_open(ns->device_path, flags, 0);
         if (IS_ERR(ns->file)) {
-               pr_err("failed to open file %s: (%ld)\n",
-                               ns->device_path, PTR_ERR(ns->file));
-               return PTR_ERR(ns->file);
+               ret = PTR_ERR(ns->file);
+               pr_err("failed to open file %s: (%d)\n",
+                       ns->device_path, ret);
+               ns->file = NULL;
+               return ret;
         }
  
         ret = nvmet_file_ns_revalidate(ns);
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h

index 5566ed4..d69a409 100644 (file)
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -616,4 +616,10 @@ static inline sector_t nvmet_lba_to_sect(struct nvmet_ns *ns, __le64 lba)
         return le64_to_cpu(lba) << (ns->blksize_shift - SECTOR_SHIFT);
  }
  
+static inline bool nvmet_use_inline_bvec(struct nvmet_req *req)
+{
+       return req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN &&
+              req->sg_cnt <= NVMET_MAX_INLINE_BIOVEC;
+}
+
  #endif /* _NVMET_H */
diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c

index 2798944..39b1473 100644 (file)
--- a/drivers/nvme/target/passthru.c
+++ b/drivers/nvme/target/passthru.c
@@ -194,7 +194,7 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
         if (req->sg_cnt > BIO_MAX_VECS)
                 return -EINVAL;
  
-       if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
+       if (nvmet_use_inline_bvec(req)) {
                 bio = &req->p.inline_bio;
                 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
         } else {
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c

index 6c1f3ab..7d607f4 100644 (file)
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -700,7 +700,7 @@ static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
  {
         struct nvmet_rdma_rsp *rsp =
                 container_of(wc->wr_cqe, struct nvmet_rdma_rsp, send_cqe);
-       struct nvmet_rdma_queue *queue = cq->cq_context;
+       struct nvmet_rdma_queue *queue = wc->qp->qp_context;
  
         nvmet_rdma_release_rsp(rsp);
  
@@ -786,7 +786,7 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
  {
         struct nvmet_rdma_rsp *rsp =
                 container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
-       struct nvmet_rdma_queue *queue = cq->cq_context;
+       struct nvmet_rdma_queue *queue = wc->qp->qp_context;
         struct rdma_cm_id *cm_id = rsp->queue->cm_id;
         u16 status;
  
diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c

index 01645e8..fa1548d 100644 (file)
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -1171,7 +1171,7 @@ static inline int resize_screen(struct vc_data *vc, int width, int height,
         /* Resizes the resolution of the display adapater */
         int err = 0;
  
-       if (vc->vc_mode != KD_GRAPHICS && vc->vc_sw->con_resize)
+       if (vc->vc_sw->con_resize)
                 err = vc->vc_sw->con_resize(vc, width, height, user);
  
         return err;
diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c

index 89aeaf3..0e0cd9e 100644 (file)
--- a/drivers/tty/vt/vt_ioctl.c
+++ b/drivers/tty/vt/vt_ioctl.c
@@ -671,21 +671,58 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs)
         if (copy_from_user(&v, cs, sizeof(struct vt_consize)))
                 return -EFAULT;
  
-       if (v.v_vlin)
-               pr_info_once("\"struct vt_consize\"->v_vlin is ignored. Please report if you need this.\n");
-       if (v.v_clin)
-               pr_info_once("\"struct vt_consize\"->v_clin is ignored. Please report if you need this.\n");
+       /* FIXME: Should check the copies properly */
+       if (!v.v_vlin)
+               v.v_vlin = vc->vc_scan_lines;
+
+       if (v.v_clin) {
+               int rows = v.v_vlin / v.v_clin;
+               if (v.v_rows != rows) {
+                       if (v.v_rows) /* Parameters don't add up */
+                               return -EINVAL;
+                       v.v_rows = rows;
+               }
+       }
+
+       if (v.v_vcol && v.v_ccol) {
+               int cols = v.v_vcol / v.v_ccol;
+               if (v.v_cols != cols) {
+                       if (v.v_cols)
+                               return -EINVAL;
+                       v.v_cols = cols;
+               }
+       }
+
+       if (v.v_clin > 32)
+               return -EINVAL;
  
-       console_lock();
         for (i = 0; i < MAX_NR_CONSOLES; i++) {
-               vc = vc_cons[i].d;
+               struct vc_data *vcp;
  
-               if (vc) {
-                       vc->vc_resize_user = 1;
-                       vc_resize(vc, v.v_cols, v.v_rows);
+               if (!vc_cons[i].d)
+                       continue;
+               console_lock();
+               vcp = vc_cons[i].d;
+               if (vcp) {
+                       int ret;
+                       int save_scan_lines = vcp->vc_scan_lines;
+                       int save_cell_height = vcp->vc_cell_height;
+
+                       if (v.v_vlin)
+                               vcp->vc_scan_lines = v.v_vlin;
+                       if (v.v_clin)
+                               vcp->vc_cell_height = v.v_clin;
+                       vcp->vc_resize_user = 1;
+                       ret = vc_resize(vcp, v.v_cols, v.v_rows);
+                       if (ret) {
+                               vcp->vc_scan_lines = save_scan_lines;
+                               vcp->vc_cell_height = save_cell_height;
+                               console_unlock();
+                               return ret;
+                       }
                 }
+               console_unlock();
         }
-       console_unlock();
  
         return 0;
  }
diff --git a/drivers/video/console/vgacon.c b/drivers/video/console/vgacon.c

index 39258f9..ef9c57c 100644 (file)
--- a/drivers/video/console/vgacon.c
+++ b/drivers/video/console/vgacon.c
@@ -380,7 +380,7 @@ static void vgacon_init(struct vc_data *c, int init)
                 vc_resize(c, vga_video_num_columns, vga_video_num_lines);
  
         c->vc_scan_lines = vga_scan_lines;
-       c->vc_font.height = vga_video_font_height;
+       c->vc_font.height = c->vc_cell_height = vga_video_font_height;
         c->vc_complement_mask = 0x7700;
         if (vga_512_chars)
                 c->vc_hi_font_mask = 0x0800;
@@ -515,32 +515,32 @@ static void vgacon_cursor(struct vc_data *c, int mode)
                 switch (CUR_SIZE(c->vc_cursor_type)) {
                 case CUR_UNDERLINE:
                         vgacon_set_cursor_size(c->state.x,
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                 10 ? 2 : 3),
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                 10 ? 1 : 2));
                         break;
                 case CUR_TWO_THIRDS:
                         vgacon_set_cursor_size(c->state.x,
-                                              c->vc_font.height / 3,
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              c->vc_cell_height / 3,
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                 10 ? 1 : 2));
                         break;
                 case CUR_LOWER_THIRD:
                         vgacon_set_cursor_size(c->state.x,
-                                              (c->vc_font.height * 2) / 3,
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              (c->vc_cell_height * 2) / 3,
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                 10 ? 1 : 2));
                         break;
                 case CUR_LOWER_HALF:
                         vgacon_set_cursor_size(c->state.x,
-                                              c->vc_font.height / 2,
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              c->vc_cell_height / 2,
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                 10 ? 1 : 2));
                         break;
                 case CUR_NONE:
@@ -551,7 +551,7 @@ static void vgacon_cursor(struct vc_data *c, int mode)
                         break;
                 default:
                         vgacon_set_cursor_size(c->state.x, 1,
-                                              c->vc_font.height);
+                                              c->vc_cell_height);
                         break;
                 }
                 break;
@@ -562,13 +562,13 @@ static int vgacon_doresize(struct vc_data *c,
                 unsigned int width, unsigned int height)
  {
         unsigned long flags;
-       unsigned int scanlines = height * c->vc_font.height;
+       unsigned int scanlines = height * c->vc_cell_height;
         u8 scanlines_lo = 0, r7 = 0, vsync_end = 0, mode, max_scan;
  
         raw_spin_lock_irqsave(&vga_lock, flags);
  
         vgacon_xres = width * VGA_FONTWIDTH;
-       vgacon_yres = height * c->vc_font.height;
+       vgacon_yres = height * c->vc_cell_height;
         if (vga_video_type >= VIDEO_TYPE_VGAC) {
                 outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg);
                 max_scan = inb_p(vga_video_port_val);
@@ -623,9 +623,9 @@ static int vgacon_doresize(struct vc_data *c,
  static int vgacon_switch(struct vc_data *c)
  {
         int x = c->vc_cols * VGA_FONTWIDTH;
-       int y = c->vc_rows * c->vc_font.height;
+       int y = c->vc_rows * c->vc_cell_height;
         int rows = screen_info.orig_video_lines * vga_default_font_height/
-               c->vc_font.height;
+               c->vc_cell_height;
         /*
          * We need to save screen size here as it's the only way
          * we can spot the screen has been resized and we need to
@@ -1038,7 +1038,7 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight)
                                 cursor_size_lastto = 0;
                                 c->vc_sw->con_cursor(c, CM_DRAW);
                         }
-                       c->vc_font.height = fontheight;
+                       c->vc_font.height = c->vc_cell_height = fontheight;
                         vc_resize(c, 0, rows);  /* Adjust console size */
                 }
         }
@@ -1086,12 +1086,20 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
         if ((width << 1) * height > vga_vram_size)
                 return -EINVAL;
  
+       if (user) {
+               /*
+                * Ho ho!  Someone (svgatextmode, eh?) may have reprogrammed
+                * the video mode!  Set the new defaults then and go away.
+                */
+               screen_info.orig_video_cols = width;
+               screen_info.orig_video_lines = height;
+               vga_default_font_height = c->vc_cell_height;
+               return 0;
+       }
         if (width % 2 || width > screen_info.orig_video_cols ||
             height > (screen_info.orig_video_lines * vga_default_font_height)/
-           c->vc_font.height)
-               /* let svgatextmode tinker with video timings and
-                  return success */
-               return (user) ? 0 : -EINVAL;
+           c->vc_cell_height)
+               return -EINVAL;
  
         if (con_is_visible(c) && !vga_is_gfx) /* who knows */
                 vgacon_doresize(c, width, height);
diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c

index 3406067..22bb389 100644 (file)
--- a/drivers/video/fbdev/core/fbcon.c
+++ b/drivers/video/fbdev/core/fbcon.c
@@ -2019,7 +2019,7 @@ static int fbcon_resize(struct vc_data *vc, unsigned int width,
                         return -EINVAL;
  
                 pr_debug("resize now %ix%i\n", var.xres, var.yres);
-               if (con_is_visible(vc)) {
+               if (con_is_visible(vc) && vc->vc_mode == KD_TEXT) {
                         var.activate = FB_ACTIVATE_NOW |
                                 FB_ACTIVATE_FORCE;
                         fb_set_var(info, &var);
diff --git a/drivers/xen/gntdev.c b/drivers/xen/gntdev.c

index f01d58c..a3e7be9 100644 (file)
--- a/drivers/xen/gntdev.c
+++ b/drivers/xen/gntdev.c
@@ -1017,8 +1017,10 @@ static int gntdev_mmap(struct file *flip, struct vm_area_struct *vma)
                 err = mmu_interval_notifier_insert_locked(
                         &map->notifier, vma->vm_mm, vma->vm_start,
                         vma->vm_end - vma->vm_start, &gntdev_mmu_ops);
-               if (err)
+               if (err) {
+                       map->vma = NULL;
                         goto out_unlock_put;
+               }
         }
         mutex_unlock(&priv->lock);
  
diff --git a/drivers/xen/swiotlb-xen.c b/drivers/xen/swiotlb-xen.c

index 4c89afc..24d1186 100644 (file)
--- a/drivers/xen/swiotlb-xen.c
+++ b/drivers/xen/swiotlb-xen.c
@@ -164,6 +164,11 @@ int __ref xen_swiotlb_init(void)
         int rc = -ENOMEM;
         char *start;
  
+       if (io_tlb_default_mem != NULL) {
+               pr_warn("swiotlb buffer already initialized\n");
+               return -EEXIST;
+       }
+
  retry:
         m_ret = XEN_SWIOTLB_ENOMEM;
         order = get_order(bytes);
diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c

index e64e6be..87e6b7d 100644 (file)
--- a/drivers/xen/unpopulated-alloc.c
+++ b/drivers/xen/unpopulated-alloc.c
@@ -39,8 +39,10 @@ static int fill_list(unsigned int nr_pages)
         }
  
         pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
-       if (!pgmap)
+       if (!pgmap) {
+               ret = -ENOMEM;
                 goto err_pgmap;
+       }
  
         pgmap->type = MEMORY_DEVICE_GENERIC;
         pgmap->range = (struct range) {
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h

index f83fd3c..9fb7682 100644 (file)
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3127,7 +3127,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                                struct btrfs_inode *inode, u64 new_size,
                                u32 min_type);
  
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
  int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
                                bool in_reclaim_context);
  int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c

index 7a28314..f1d15b6 100644 (file)
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -1340,12 +1340,16 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
                 stripe = bbio->stripes;
                 for (i = 0; i < bbio->num_stripes; i++, stripe++) {
                         u64 bytes;
+                       struct btrfs_device *device = stripe->dev;
  
-                       if (!stripe->dev->bdev) {
+                       if (!device->bdev) {
                                 ASSERT(btrfs_test_opt(fs_info, DEGRADED));
                                 continue;
                         }
  
+                       if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+                               continue;
+
                         ret = do_discard_extent(stripe, &bytes);
                         if (!ret) {
                                 discarded_bytes += bytes;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c

index 864c08d..3b10d98 100644 (file)
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
         return ret;
  }
  
+static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+       if (btrfs_inode_in_log(inode, fs_info->generation) &&
+           list_empty(&ctx->ordered_extents))
+               return true;
+
+       /*
+        * If we are doing a fast fsync we can not bail out if the inode's
+        * last_trans is <= then the last committed transaction, because we only
+        * update the last_trans of the inode during ordered extent completion,
+        * and for a fast fsync we don't wait for that, we only wait for the
+        * writeback to complete.
+        */
+       if (inode->last_trans <= fs_info->last_trans_committed &&
+           (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
+            list_empty(&ctx->ordered_extents)))
+               return true;
+
+       return false;
+}
+
  /*
   * fsync call for both files and directories.  This logs the inode into
   * the tree log instead of forcing full commits whenever possible.
@@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
  
         atomic_inc(&root->log_batch);
  
-       /*
-        * If we are doing a fast fsync we can not bail out if the inode's
-        * last_trans is <= then the last committed transaction, because we only
-        * update the last_trans of the inode during ordered extent completion,
-        * and for a fast fsync we don't wait for that, we only wait for the
-        * writeback to complete.
-        */
         smp_mb();
-       if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
-           (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&
-            (full_sync || list_empty(&ctx.ordered_extents)))) {
+       if (skip_inode_logging(&ctx)) {
                 /*
                  * We've had everything committed since the last time we were
                  * modified so clear this flag in case it was set for whatever
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c

index e54466f..4806295 100644 (file)
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -3949,7 +3949,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
  {
         struct btrfs_block_group *block_group;
         struct rb_node *node;
-       int ret;
+       int ret = 0;
  
         btrfs_info(fs_info, "cleaning free space cache v1");
  
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index 4af3360..eb6fddf 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -9678,7 +9678,7 @@ out:
         return ret;
  }
  
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
  {
         struct writeback_control wbc = {
                 .nr_to_write = LONG_MAX,
@@ -9691,7 +9691,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
         if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                 return -EROFS;
  
-       return start_delalloc_inodes(root, &wbc, true, false);
+       return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
  }
  
  int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index ee1dbab..5dc2fd8 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -259,6 +259,8 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
         if (!fa->flags_valid) {
                 /* 1 item for the inode */
                 trans = btrfs_start_transaction(root, 1);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
                 goto update_flags;
         }
  
@@ -907,7 +909,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
          */
         btrfs_drew_read_lock(&root->snapshot_lock);
  
-       ret = btrfs_start_delalloc_snapshot(root);
+       ret = btrfs_start_delalloc_snapshot(root, false);
         if (ret)
                 goto out;
  
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c

index 07b0b42..6c413bb 100644 (file)
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -984,7 +984,7 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
  
         if (pre)
                 ret = clone_ordered_extent(ordered, 0, pre);
-       if (post)
+       if (ret == 0 && post)
                 ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes,
                                            post);
  
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c

index 2319c92..3ded812 100644 (file)
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -3545,11 +3545,15 @@ static int try_flush_qgroup(struct btrfs_root *root)
         struct btrfs_trans_handle *trans;
         int ret;
  
-       /* Can't hold an open transaction or we run the risk of deadlocking */
-       ASSERT(current->journal_info == NULL ||
-              current->journal_info == BTRFS_SEND_TRANS_STUB);
-       if (WARN_ON(current->journal_info &&
-                   current->journal_info != BTRFS_SEND_TRANS_STUB))
+       /*
+        * Can't hold an open transaction or we run the risk of deadlocking,
+        * and can't either be under the context of a send operation (where
+        * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that
+        * would result in a crash when starting a transaction and does not
+        * make sense either (send is a read-only operation).
+        */
+       ASSERT(current->journal_info == NULL);
+       if (WARN_ON(current->journal_info))
                 return 0;
  
         /*
@@ -3562,7 +3566,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
                 return 0;
         }
  
-       ret = btrfs_start_delalloc_snapshot(root);
+       ret = btrfs_start_delalloc_snapshot(root, true);
         if (ret < 0)
                 goto out;
         btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c

index 55741ad..bd69db7 100644 (file)
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -7170,7 +7170,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
         int i;
  
         if (root) {
-               ret = btrfs_start_delalloc_snapshot(root);
+               ret = btrfs_start_delalloc_snapshot(root, false);
                 if (ret)
                         return ret;
                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
@@ -7178,7 +7178,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
  
         for (i = 0; i < sctx->clone_roots_cnt; i++) {
                 root = sctx->clone_roots[i].root;
-               ret = btrfs_start_delalloc_snapshot(root);
+               ret = btrfs_start_delalloc_snapshot(root, false);
                 if (ret)
                         return ret;
                 btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c

index f67721d..95a6000 100644 (file)
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -6061,7 +6061,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
          * (since logging them is pointless, a link count of 0 means they
          * will never be accessible).
          */
-       if (btrfs_inode_in_log(inode, trans->transid) ||
+       if ((btrfs_inode_in_log(inode, trans->transid) &&
+            list_empty(&ctx->ordered_extents)) ||
             inode->vfs_inode.i_nlink == 0) {
                 ret = BTRFS_NO_LOG_SYNC;
                 goto end_no_trans;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c

index 70b23a0..304ce64 100644 (file)
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
                         goto out;
                 }
  
+               if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
+                       ret = -EIO;
+                       goto out;
+               }
+
                 switch (zone.cond) {
                 case BLK_ZONE_COND_OFFLINE:
                 case BLK_ZONE_COND_READONLY:
diff --git a/fs/dax.c b/fs/dax.c

index 6921624..62352cb 100644 (file)
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -144,6 +144,16 @@ struct wait_exceptional_entry_queue {
         struct exceptional_entry_key key;
  };
  
+/**
+ * enum dax_wake_mode: waitqueue wakeup behaviour
+ * @WAKE_ALL: wake all waiters in the waitqueue
+ * @WAKE_NEXT: wake only the first waiter in the waitqueue
+ */
+enum dax_wake_mode {
+       WAKE_ALL,
+       WAKE_NEXT,
+};
+
  static wait_queue_head_t *dax_entry_waitqueue(struct xa_state *xas,
                 void *entry, struct exceptional_entry_key *key)
  {
@@ -182,7 +192,8 @@ static int wake_exceptional_entry_func(wait_queue_entry_t *wait,
   * The important information it's conveying is whether the entry at
   * this index used to be a PMD entry.
   */
-static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
+static void dax_wake_entry(struct xa_state *xas, void *entry,
+                          enum dax_wake_mode mode)
  {
         struct exceptional_entry_key key;
         wait_queue_head_t *wq;
@@ -196,7 +207,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all)
          * must be in the waitqueue and the following check will see them.
          */
         if (waitqueue_active(wq))
-               __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key);
+               __wake_up(wq, TASK_NORMAL, mode == WAKE_ALL ? 0 : 1, &key);
  }
  
  /*
@@ -264,11 +275,11 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
         finish_wait(wq, &ewait.wait);
  }
  
-static void put_unlocked_entry(struct xa_state *xas, void *entry)
+static void put_unlocked_entry(struct xa_state *xas, void *entry,
+                              enum dax_wake_mode mode)
  {
-       /* If we were the only waiter woken, wake the next one */
         if (entry && !dax_is_conflict(entry))
-               dax_wake_entry(xas, entry, false);
+               dax_wake_entry(xas, entry, mode);
  }
  
  /*
@@ -286,7 +297,7 @@ static void dax_unlock_entry(struct xa_state *xas, void *entry)
         old = xas_store(xas, entry);
         xas_unlock_irq(xas);
         BUG_ON(!dax_is_locked(old));
-       dax_wake_entry(xas, entry, false);
+       dax_wake_entry(xas, entry, WAKE_NEXT);
  }
  
  /*
@@ -524,7 +535,7 @@ retry:
  
                 dax_disassociate_entry(entry, mapping, false);
                 xas_store(xas, NULL);   /* undo the PMD join */
-               dax_wake_entry(xas, entry, true);
+               dax_wake_entry(xas, entry, WAKE_ALL);
                 mapping->nrpages -= PG_PMD_NR;
                 entry = NULL;
                 xas_set(xas, index);
@@ -622,7 +633,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
                         entry = get_unlocked_entry(&xas, 0);
                 if (entry)
                         page = dax_busy_page(entry);
-               put_unlocked_entry(&xas, entry);
+               put_unlocked_entry(&xas, entry, WAKE_NEXT);
                 if (page)
                         break;
                 if (++scanned % XA_CHECK_SCHED)
@@ -664,7 +675,7 @@ static int __dax_invalidate_entry(struct address_space *mapping,
         mapping->nrpages -= 1UL << dax_entry_order(entry);
         ret = 1;
  out:
-       put_unlocked_entry(&xas, entry);
+       put_unlocked_entry(&xas, entry, WAKE_ALL);
         xas_unlock_irq(&xas);
         return ret;
  }
@@ -937,13 +948,13 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
         xas_lock_irq(xas);
         xas_store(xas, entry);
         xas_clear_mark(xas, PAGECACHE_TAG_DIRTY);
-       dax_wake_entry(xas, entry, false);
+       dax_wake_entry(xas, entry, WAKE_NEXT);
  
         trace_dax_writeback_one(mapping->host, index, count);
         return ret;
  
   put_unlocked:
-       put_unlocked_entry(xas, entry);
+       put_unlocked_entry(xas, entry, WAKE_NEXT);
         return ret;
  }
  
@@ -1684,7 +1695,7 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
         /* Did we race with someone splitting entry or so? */
         if (!entry || dax_is_conflict(entry) ||
             (order == 0 && !dax_is_pte_entry(entry))) {
-               put_unlocked_entry(&xas, entry);
+               put_unlocked_entry(&xas, entry, WAKE_NEXT);
                 xas_unlock_irq(&xas);
                 trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf,
                                                       VM_FAULT_NOPAGE);
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c

index e62d813..efaf325 100644 (file)
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -450,14 +450,31 @@ static int z_erofs_get_extent_compressedlen(struct z_erofs_maprecorder *m,
         lcn = m->lcn + 1;
         if (m->compressedlcs)
                 goto out;
-       if (lcn == initial_lcn)
-               goto err_bonus_cblkcnt;
  
         err = z_erofs_load_cluster_from_disk(m, lcn);
         if (err)
                 return err;
  
+       /*
+        * If the 1st NONHEAD lcluster has already been handled initially w/o
+        * valid compressedlcs, which means at least it mustn't be CBLKCNT, or
+        * an internal implemenatation error is detected.
+        *
+        * The following code can also handle it properly anyway, but let's
+        * BUG_ON in the debugging mode only for developers to notice that.
+        */
+       DBG_BUGON(lcn == initial_lcn &&
+                 m->type == Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD);
+
         switch (m->type) {
+       case Z_EROFS_VLE_CLUSTER_TYPE_PLAIN:
+       case Z_EROFS_VLE_CLUSTER_TYPE_HEAD:
+               /*
+                * if the 1st NONHEAD lcluster is actually PLAIN or HEAD type
+                * rather than CBLKCNT, it's a 1 lcluster-sized pcluster.
+                */
+               m->compressedlcs = 1;
+               break;
         case Z_EROFS_VLE_CLUSTER_TYPE_NONHEAD:
                 if (m->delta[0] != 1)
                         goto err_bonus_cblkcnt;
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c

index 53b1378..925a5ca 100644 (file)
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -117,19 +117,6 @@ static void f2fs_unlock_rpages(struct compress_ctx *cc, int len)
         f2fs_drop_rpages(cc, len, true);
  }
  
-static void f2fs_put_rpages_mapping(struct address_space *mapping,
-                               pgoff_t start, int len)
-{
-       int i;
-
-       for (i = 0; i < len; i++) {
-               struct page *page = find_get_page(mapping, start + i);
-
-               put_page(page);
-               put_page(page);
-       }
-}
-
  static void f2fs_put_rpages_wbc(struct compress_ctx *cc,
                 struct writeback_control *wbc, bool redirty, int unlock)
  {
@@ -158,13 +145,14 @@ int f2fs_init_compress_ctx(struct compress_ctx *cc)
         return cc->rpages ? 0 : -ENOMEM;
  }
  
-void f2fs_destroy_compress_ctx(struct compress_ctx *cc)
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse)
  {
         page_array_free(cc->inode, cc->rpages, cc->cluster_size);
         cc->rpages = NULL;
         cc->nr_rpages = 0;
         cc->nr_cpages = 0;
-       cc->cluster_idx = NULL_CLUSTER;
+       if (!reuse)
+               cc->cluster_idx = NULL_CLUSTER;
  }
  
  void f2fs_compress_ctx_add_page(struct compress_ctx *cc, struct page *page)
@@ -1036,7 +1024,7 @@ retry:
                 }
  
                 if (PageUptodate(page))
-                       unlock_page(page);
+                       f2fs_put_page(page, 1);
                 else
                         f2fs_compress_ctx_add_page(cc, page);
         }
@@ -1046,33 +1034,35 @@ retry:
  
                 ret = f2fs_read_multi_pages(cc, &bio, cc->cluster_size,
                                         &last_block_in_bio, false, true);
-               f2fs_destroy_compress_ctx(cc);
+               f2fs_put_rpages(cc);
+               f2fs_destroy_compress_ctx(cc, true);
                 if (ret)
-                       goto release_pages;
+                       goto out;
                 if (bio)
                         f2fs_submit_bio(sbi, bio, DATA);
  
                 ret = f2fs_init_compress_ctx(cc);
                 if (ret)
-                       goto release_pages;
+                       goto out;
         }
  
         for (i = 0; i < cc->cluster_size; i++) {
                 f2fs_bug_on(sbi, cc->rpages[i]);
  
                 page = find_lock_page(mapping, start_idx + i);
-               f2fs_bug_on(sbi, !page);
+               if (!page) {
+                       /* page can be truncated */
+                       goto release_and_retry;
+               }
  
                 f2fs_wait_on_page_writeback(page, DATA, true, true);
-
                 f2fs_compress_ctx_add_page(cc, page);
-               f2fs_put_page(page, 0);
  
                 if (!PageUptodate(page)) {
+release_and_retry:
+                       f2fs_put_rpages(cc);
                         f2fs_unlock_rpages(cc, i + 1);
-                       f2fs_put_rpages_mapping(mapping, start_idx,
-                                       cc->cluster_size);
-                       f2fs_destroy_compress_ctx(cc);
+                       f2fs_destroy_compress_ctx(cc, true);
                         goto retry;
                 }
         }
@@ -1103,10 +1093,10 @@ retry:
         }
  
  unlock_pages:
+       f2fs_put_rpages(cc);
         f2fs_unlock_rpages(cc, i);
-release_pages:
-       f2fs_put_rpages_mapping(mapping, start_idx, i);
-       f2fs_destroy_compress_ctx(cc);
+       f2fs_destroy_compress_ctx(cc, true);
+out:
         return ret;
  }
  
@@ -1141,7 +1131,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
                 set_cluster_dirty(&cc);
  
         f2fs_put_rpages_wbc(&cc, NULL, false, 1);
-       f2fs_destroy_compress_ctx(&cc);
+       f2fs_destroy_compress_ctx(&cc, false);
  
         return first_index;
  }
@@ -1361,7 +1351,7 @@ unlock_continue:
         f2fs_put_rpages(cc);
         page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
         cc->cpages = NULL;
-       f2fs_destroy_compress_ctx(cc);
+       f2fs_destroy_compress_ctx(cc, false);
         return 0;
  
  out_destroy_crypt:
@@ -1372,7 +1362,8 @@ out_destroy_crypt:
         for (i = 0; i < cc->nr_cpages; i++) {
                 if (!cc->cpages[i])
                         continue;
-               f2fs_put_page(cc->cpages[i], 1);
+               f2fs_compress_free_page(cc->cpages[i]);
+               cc->cpages[i] = NULL;
         }
  out_put_cic:
         kmem_cache_free(cic_entry_slab, cic);
@@ -1522,7 +1513,7 @@ write:
         err = f2fs_write_raw_pages(cc, submitted, wbc, io_type);
         f2fs_put_rpages_wbc(cc, wbc, false, 0);
  destroy_out:
-       f2fs_destroy_compress_ctx(cc);
+       f2fs_destroy_compress_ctx(cc, false);
         return err;
  }
  
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c

index 96f1a35..009a09f 100644 (file)
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -2287,7 +2287,7 @@ static int f2fs_mpage_readpages(struct inode *inode,
                                                         max_nr_pages,
                                                         &last_block_in_bio,
                                                         rac != NULL, false);
-                               f2fs_destroy_compress_ctx(&cc);
+                               f2fs_destroy_compress_ctx(&cc, false);
                                 if (ret)
                                         goto set_error_page;
                         }
@@ -2332,7 +2332,7 @@ next_page:
                                                         max_nr_pages,
                                                         &last_block_in_bio,
                                                         rac != NULL, false);
-                               f2fs_destroy_compress_ctx(&cc);
+                               f2fs_destroy_compress_ctx(&cc, false);
                         }
                 }
  #endif
@@ -3033,7 +3033,7 @@ next:
                 }
         }
         if (f2fs_compressed_file(inode))
-               f2fs_destroy_compress_ctx(&cc);
+               f2fs_destroy_compress_ctx(&cc, false);
  #endif
         if (retry) {
                 index = 0;
@@ -3801,6 +3801,7 @@ static int f2fs_is_file_aligned(struct inode *inode)
         block_t pblock;
         unsigned long nr_pblocks;
         unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+       unsigned int not_aligned = 0;
         int ret = 0;
  
         cur_lblock = 0;
@@ -3833,13 +3834,20 @@ static int f2fs_is_file_aligned(struct inode *inode)
  
                 if ((pblock - main_blkaddr) & (blocks_per_sec - 1) ||
                         nr_pblocks & (blocks_per_sec - 1)) {
-                       f2fs_err(sbi, "Swapfile does not align to section");
-                       ret = -EINVAL;
-                       goto out;
+                       if (f2fs_is_pinned_file(inode)) {
+                               f2fs_err(sbi, "Swapfile does not align to section");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       not_aligned++;
                 }
  
                 cur_lblock += nr_pblocks;
         }
+       if (not_aligned)
+               f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n"
+                       "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()",
+                       not_aligned);
  out:
         return ret;
  }
@@ -3858,6 +3866,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
         int nr_extents = 0;
         unsigned long nr_pblocks;
         unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+       unsigned int not_aligned = 0;
         int ret = 0;
  
         /*
@@ -3887,7 +3896,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
                 /* hole */
                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
                         f2fs_err(sbi, "Swapfile has holes\n");
-                       ret = -ENOENT;
+                       ret = -EINVAL;
                         goto out;
                 }
  
@@ -3896,9 +3905,12 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
  
                 if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) ||
                                 nr_pblocks & (blocks_per_sec - 1)) {
-                       f2fs_err(sbi, "Swapfile does not align to section");
-                       ret = -EINVAL;
-                       goto out;
+                       if (f2fs_is_pinned_file(inode)) {
+                               f2fs_err(sbi, "Swapfile does not align to section");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       not_aligned++;
                 }
  
                 if (cur_lblock + nr_pblocks >= sis->max)
@@ -3927,6 +3939,11 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
         sis->max = cur_lblock;
         sis->pages = cur_lblock - 1;
         sis->highest_bit = cur_lblock - 1;
+
+       if (not_aligned)
+               f2fs_warn(sbi, "Swapfile (%u) is not align to section: \n"
+                       "\t1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate()",
+                       not_aligned);
  out:
         return ret;
  }
@@ -4035,7 +4052,7 @@ out:
         return ret;
  bad_bmap:
         f2fs_err(sbi, "Swapfile has holes\n");
-       return -ENOENT;
+       return -EINVAL;
  }
  
  static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h

index 0448788..c83d901 100644 (file)
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -3956,7 +3956,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc);
  void f2fs_decompress_end_io(struct decompress_io_ctx *dic, bool failed);
  void f2fs_put_page_dic(struct page *page);
  int f2fs_init_compress_ctx(struct compress_ctx *cc);
-void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
+void f2fs_destroy_compress_ctx(struct compress_ctx *cc, bool reuse);
  void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
  int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
  void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c

index 44a4650..ceb575f 100644 (file)
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1817,7 +1817,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
         struct f2fs_inode_info *fi = F2FS_I(inode);
         u32 masked_flags = fi->i_flags & mask;
  
-       f2fs_bug_on(F2FS_I_SB(inode), (iflags & ~mask));
+       /* mask can be shrunk by flags_valid selector */
+       iflags &= mask;
  
         /* Is it quota file? Do not allow user to mess with it */
         if (IS_NOQUOTA(inode))
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c

index c605415..51dc79f 100644 (file)
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3574,12 +3574,12 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
  
         return err;
  drop_bio:
-       if (fio->bio) {
+       if (fio->bio && *(fio->bio)) {
                 struct bio *bio = *(fio->bio);
  
                 bio->bi_status = BLK_STS_IOERR;
                 bio_endio(bio);
-               fio->bio = NULL;
+               *(fio->bio) = NULL;
         }
         return err;
  }
diff --git a/fs/hfsplus/extents.c b/fs/hfsplus/extents.c

index a930ddd..7054a54 100644 (file)
--- a/fs/hfsplus/extents.c
+++ b/fs/hfsplus/extents.c
@@ -598,13 +598,15 @@ void hfsplus_file_truncate(struct inode *inode)
                 res = __hfsplus_ext_cache_extent(&fd, inode, alloc_cnt);
                 if (res)
                         break;
-               hfs_brec_remove(&fd);
  
-               mutex_unlock(&fd.tree->tree_lock);
                 start = hip->cached_start;
+               if (blk_cnt <= start)
+                       hfs_brec_remove(&fd);
+               mutex_unlock(&fd.tree->tree_lock);
                 hfsplus_free_extents(sb, hip->cached_extents,
                                      alloc_cnt - start, alloc_cnt - blk_cnt);
                 hfsplus_dump_extent(hip->cached_extents);
+               mutex_lock(&fd.tree->tree_lock);
                 if (blk_cnt > start) {
                         hip->extent_state |= HFSPLUS_EXT_DIRTY;
                         break;
@@ -612,7 +614,6 @@ void hfsplus_file_truncate(struct inode *inode)
                 alloc_cnt = start;
                 hip->cached_start = hip->cached_blocks = 0;
                 hip->extent_state &= ~(HFSPLUS_EXT_DIRTY | HFSPLUS_EXT_NEW);
-               mutex_lock(&fd.tree->tree_lock);
         }
         hfs_find_exit(&fd);
  
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index a2a4233..9d9e009 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -131,6 +131,7 @@ static void huge_pagevec_release(struct pagevec *pvec)
  static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
  {
         struct inode *inode = file_inode(file);
+       struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
         loff_t len, vma_len;
         int ret;
         struct hstate *h = hstate_file(file);
@@ -146,6 +147,10 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
         vma->vm_flags |= VM_HUGETLB | VM_DONTEXPAND;
         vma->vm_ops = &hugetlb_vm_ops;
  
+       ret = seal_check_future_write(info->seals, vma);
+       if (ret)
+               return ret;
+
         /*
          * page based offset in vm_pgoff could be sufficiently large to
          * overflow a loff_t when converted to byte offset.  This can
diff --git a/fs/io_uring.c b/fs/io_uring.c

index f46acbb..e481ac8 100644 (file)
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -100,6 +100,8 @@
  #define IORING_MAX_RESTRICTIONS        (IORING_RESTRICTION_LAST + \
                                  IORING_REGISTER_LAST + IORING_OP_LAST)
  
+#define IORING_MAX_REG_BUFFERS (1U << 14)
+
  #define SQE_VALID_FLAGS        (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
                                 IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
                                 IOSQE_BUFFER_SELECT)
@@ -4035,7 +4037,7 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
  #if defined(CONFIG_EPOLL)
         if (sqe->ioprio || sqe->buf_index)
                 return -EINVAL;
-       if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                 return -EINVAL;
  
         req->epoll.epfd = READ_ONCE(sqe->fd);
@@ -4150,7 +4152,7 @@ static int io_fadvise(struct io_kiocb *req, unsigned int issue_flags)
  
  static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
  {
-       if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL | IORING_SETUP_SQPOLL)))
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                 return -EINVAL;
         if (sqe->ioprio || sqe->buf_index)
                 return -EINVAL;
@@ -5827,8 +5829,6 @@ done:
  static int io_rsrc_update_prep(struct io_kiocb *req,
                                 const struct io_uring_sqe *sqe)
  {
-       if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
-               return -EINVAL;
         if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
                 return -EINVAL;
         if (sqe->ioprio || sqe->rw_flags)
@@ -6354,19 +6354,20 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
          * We don't expect the list to be empty, that will only happen if we
          * race with the completion of the linked work.
          */
-       if (prev && req_ref_inc_not_zero(prev))
+       if (prev) {
                 io_remove_next_linked(prev);
-       else
-               prev = NULL;
+               if (!req_ref_inc_not_zero(prev))
+                       prev = NULL;
+       }
         spin_unlock_irqrestore(&ctx->completion_lock, flags);
  
         if (prev) {
                 io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
                 io_put_req_deferred(prev, 1);
+               io_put_req_deferred(req, 1);
         } else {
                 io_req_complete_post(req, -ETIME, 0);
         }
-       io_put_req_deferred(req, 1);
         return HRTIMER_NORESTART;
  }
  
@@ -8390,7 +8391,7 @@ static int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
  
         if (ctx->user_bufs)
                 return -EBUSY;
-       if (!nr_args || nr_args > UIO_MAXIOV)
+       if (!nr_args || nr_args > IORING_MAX_REG_BUFFERS)
                 return -EINVAL;
         ret = io_rsrc_node_switch_start(ctx);
         if (ret)
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c

index f2cd203..9023717 100644 (file)
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -394,7 +394,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
  {
         struct inode *inode = rac->mapping->host;
         loff_t pos = readahead_pos(rac);
-       loff_t length = readahead_length(rac);
+       size_t length = readahead_length(rac);
         struct iomap_readpage_ctx ctx = {
                 .rac    = rac,
         };
@@ -402,7 +402,7 @@ void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops)
         trace_iomap_readahead(inode, readahead_count(rac));
  
         while (length > 0) {
-               loff_t ret = iomap_apply(inode, pos, length, 0, ops,
+               ssize_t ret = iomap_apply(inode, pos, length, 0, ops,
                                 &ctx, iomap_readahead_actor);
                 if (ret <= 0) {
                         WARN_ON_ONCE(ret == 0);
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c

index 7b11283..89d4929 100644 (file)
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -211,11 +211,11 @@ failure:
   * If the skip factor is limited in this way then the file will use multiple
   * slots.
   */
-static inline int calculate_skip(int blocks)
+static inline int calculate_skip(u64 blocks)
  {
-       int skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
+       u64 skip = blocks / ((SQUASHFS_META_ENTRIES + 1)
                  * SQUASHFS_META_INDEXES);
-       return min(SQUASHFS_CACHED_BLKS - 1, skip + 1);
+       return min((u64) SQUASHFS_CACHED_BLKS - 1, skip + 1);
  }
  
  
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 1255823..f69c75b 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -676,11 +676,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
  extern void blk_set_pm_only(struct request_queue *q);
  extern void blk_clear_pm_only(struct request_queue *q);
  
-static inline bool blk_account_rq(struct request *rq)
-{
-       return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq);
-}
-
  #define list_entry_rq(ptr)     list_entry((ptr), struct request, queuelist)
  
  #define rq_data_dir(rq)                (op_is_write(req_op(rq)) ? WRITE : READ)
diff --git a/include/linux/console_struct.h b/include/linux/console_struct.h

index 1537348..d5b9c8d 100644 (file)
--- a/include/linux/console_struct.h
+++ b/include/linux/console_struct.h
@@ -101,6 +101,7 @@ struct vc_data {
         unsigned int    vc_rows;
         unsigned int    vc_size_row;            /* Bytes per row */
         unsigned int    vc_scan_lines;          /* # of scan lines */
+       unsigned int    vc_cell_height;         /* CRTC character cell height */
         unsigned long   vc_origin;              /* [!] Start of real screen */
         unsigned long   vc_scr_end;             /* [!] End of real screen */
         unsigned long   vc_visible_origin;      /* [!] Top of visible window */
diff --git a/include/linux/elevator.h b/include/linux/elevator.h

index 1fe8e10..dcb2f90 100644 (file)
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -34,7 +34,7 @@ struct elevator_mq_ops {
         void (*depth_updated)(struct blk_mq_hw_ctx *);
  
         bool (*allow_merge)(struct request_queue *, struct request *, struct bio *);
-       bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int);
+       bool (*bio_merge)(struct request_queue *, struct bio *, unsigned int);
         int (*request_merge)(struct request_queue *q, struct request **, struct bio *);
         void (*request_merged)(struct request_queue *, struct request *, enum elv_merge);
         void (*requests_merged)(struct request_queue *, struct request *, struct request *);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h

index 01f251b..89b69e6 100644 (file)
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -141,7 +141,6 @@ static inline void __iomem *devm_nvdimm_ioremap(struct device *dev,
  
  struct nvdimm_bus;
  struct module;
-struct device;
  struct nd_blk_region;
  struct nd_blk_region_desc {
         int (*enable)(struct nvdimm_bus *nvdimm_bus, struct device *dev);
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 322ec61..c274f75 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3216,5 +3216,37 @@ void mem_dump_obj(void *object);
  static inline void mem_dump_obj(void *object) {}
  #endif
  
+/**
+ * seal_check_future_write - Check for F_SEAL_FUTURE_WRITE flag and handle it
+ * @seals: the seals to check
+ * @vma: the vma to operate on
+ *
+ * Check whether F_SEAL_FUTURE_WRITE is set; if so, do proper check/handling on
+ * the vma flags.  Return 0 if check pass, or <0 for errors.
+ */
+static inline int seal_check_future_write(int seals, struct vm_area_struct *vma)
+{
+       if (seals & F_SEAL_FUTURE_WRITE) {
+               /*
+                * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
+                * "future write" seal active.
+                */
+               if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
+                       return -EPERM;
+
+               /*
+                * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
+                * MAP_SHARED and read-only, take care to not allow mprotect to
+                * revert protections on such mappings. Do this only for shared
+                * mappings. For private mappings, don't need to mask
+                * VM_MAYWRITE as we still want them to be COW-writable.
+                */
+               if (vma->vm_flags & VM_SHARED)
+                       vma->vm_flags &= ~(VM_MAYWRITE);
+       }
+
+       return 0;
+}
+
  #endif /* __KERNEL__ */
  #endif /* _LINUX_MM_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h

index 6613b26..5aacc1c 100644 (file)
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -97,10 +97,10 @@ struct page {
                 };
                 struct {        /* page_pool used by netstack */
                         /**
-                        * @dma_addr: might require a 64-bit value even on
+                        * @dma_addr: might require a 64-bit value on
                          * 32-bit architectures.
                          */
-                       dma_addr_t dma_addr;
+                       unsigned long dma_addr[2];
                 };
                 struct {        /* slab, slob and slub */
                         union {
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h

index a4bd411..e89df44 100644 (file)
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -997,9 +997,9 @@ static inline loff_t readahead_pos(struct readahead_control *rac)
   * readahead_length - The number of bytes in this readahead request.
   * @rac: The readahead request.
   */
-static inline loff_t readahead_length(struct readahead_control *rac)
+static inline size_t readahead_length(struct readahead_control *rac)
  {
-       return (loff_t)rac->_nr_pages * PAGE_SIZE;
+       return rac->_nr_pages * PAGE_SIZE;
  }
  
  /**
@@ -1024,7 +1024,7 @@ static inline unsigned int readahead_count(struct readahead_control *rac)
   * readahead_batch_length - The number of bytes in the current batch.
   * @rac: The readahead request.
   */
-static inline loff_t readahead_batch_length(struct readahead_control *rac)
+static inline size_t readahead_batch_length(struct readahead_control *rac)
  {
         return rac->_batch_count * PAGE_SIZE;
  }
diff --git a/include/linux/pm.h b/include/linux/pm.h

index c965740..1d8209c 100644 (file)
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -601,6 +601,7 @@ struct dev_pm_info {
         unsigned int            idle_notification:1;
         unsigned int            request_pending:1;
         unsigned int            deferred_resume:1;
+       unsigned int            needs_force_resume:1;
         unsigned int            runtime_auto:1;
         bool                    ignore_children:1;
         unsigned int            no_callbacks:1;
diff --git a/include/linux/randomize_kstack.h b/include/linux/randomize_kstack.h

index fd80fab..bebc911 100644 (file)
--- a/include/linux/randomize_kstack.h
+++ b/include/linux/randomize_kstack.h
@@ -38,7 +38,7 @@ void *__builtin_alloca(size_t size);
                 u32 offset = raw_cpu_read(kstack_offset);               \
                 u8 *ptr = __builtin_alloca(KSTACK_OFFSET_MAX(offset));  \
                 /* Keep allocation even after "ptr" loses scope. */     \
-               asm volatile("" : "=o"(*ptr) :: "memory");              \
+               asm volatile("" :: "r"(ptr) : "memory");                \
         }                                                               \
  } while (0)
  
diff --git a/include/net/page_pool.h b/include/net/page_pool.h

index 6d517a3..b4b6de9 100644 (file)
--- a/include/net/page_pool.h
+++ b/include/net/page_pool.h
@@ -198,7 +198,17 @@ static inline void page_pool_recycle_direct(struct page_pool *pool,
  
  static inline dma_addr_t page_pool_get_dma_addr(struct page *page)
  {
-       return page->dma_addr;
+       dma_addr_t ret = page->dma_addr[0];
+       if (sizeof(dma_addr_t) > sizeof(unsigned long))
+               ret |= (dma_addr_t)page->dma_addr[1] << 16 << 16;
+       return ret;
+}
+
+static inline void page_pool_set_dma_addr(struct page *page, dma_addr_t addr)
+{
+       page->dma_addr[0] = addr;
+       if (sizeof(dma_addr_t) > sizeof(unsigned long))
+               page->dma_addr[1] = upper_32_bits(addr);
  }
  
  static inline bool is_page_pool_compiled_in(void)
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h

index f44eb0a..4c32e97 100644 (file)
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -185,7 +185,7 @@ struct fsxattr {
  #define BLKROTATIONAL _IO(0x12,126)
  #define BLKZEROOUT _IO(0x12,127)
  /*
- * A jump here: 130-131 are reserved for zoned block devices
+ * A jump here: 130-136 are reserved for zoned block devices
   * (see uapi/linux/blkzoned.h)
   */
  
diff --git a/include/xen/arm/swiotlb-xen.h b/include/xen/arm/swiotlb-xen.h

index 2994fe6..33336ab 100644 (file)
--- a/include/xen/arm/swiotlb-xen.h
+++ b/include/xen/arm/swiotlb-xen.h
@@ -2,6 +2,19 @@
  #ifndef _ASM_ARM_SWIOTLB_XEN_H
  #define _ASM_ARM_SWIOTLB_XEN_H
  
-extern int xen_swiotlb_detect(void);
+#include <xen/features.h>
+#include <xen/xen.h>
+
+static inline int xen_swiotlb_detect(void)
+{
+       if (!xen_domain())
+               return 0;
+       if (xen_feature(XENFEAT_direct_mapped))
+               return 1;
+       /* legacy case */
+       if (!xen_feature(XENFEAT_not_direct_mapped) && xen_initial_domain())
+               return 1;
+       return 0;
+}
  
  #endif /* _ASM_ARM_SWIOTLB_XEN_H */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c

index 76f0945..2997ca6 100644 (file)
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -170,6 +170,21 @@ void __ptrace_unlink(struct task_struct *child)
         spin_unlock(&child->sighand->siglock);
  }
  
+static bool looks_like_a_spurious_pid(struct task_struct *task)
+{
+       if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP))
+               return false;
+
+       if (task_pid_vnr(task) == task->ptrace_message)
+               return false;
+       /*
+        * The tracee changed its pid but the PTRACE_EVENT_EXEC event
+        * was not wait()'ed, most probably debugger targets the old
+        * leader which was destroyed in de_thread().
+        */
+       return true;
+}
+
  /* Ensure that nothing can wake it up, even SIGKILL */
  static bool ptrace_freeze_traced(struct task_struct *task)
  {
@@ -180,7 +195,8 @@ static bool ptrace_freeze_traced(struct task_struct *task)
                 return ret;
  
         spin_lock_irq(&task->sighand->siglock);
-       if (task_is_traced(task) && !__fatal_signal_pending(task)) {
+       if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
+           !__fatal_signal_pending(task)) {
                 task->state = __TASK_TRACED;
                 ret = true;
         }
diff --git a/kernel/resource.c b/kernel/resource.c

index 028a5ab..ca9f519 100644 (file)
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1805,7 +1805,7 @@ static struct resource *__request_free_mem_region(struct device *dev,
                                 REGION_DISJOINT)
                         continue;
  
-               if (!__request_region_locked(res, &iomem_resource, addr, size,
+               if (__request_region_locked(res, &iomem_resource, addr, size,
                                                 name, 0))
                         break;
  
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c

index 20aa234..3248e24 100644 (file)
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6217,7 +6217,7 @@ static int select_idle_cpu(struct task_struct *p, struct sched_domain *sd, bool
         }
  
         if (has_idle_core)
-               set_idle_cores(this, false);
+               set_idle_cores(target, false);
  
         if (sched_feat(SIS_PROP) && !has_idle_core) {
                 time = cpu_clock(this) - time;
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c

index bea9d08..5897828 100644 (file)
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -92,7 +92,7 @@ static int alarmtimer_rtc_add_device(struct device *dev,
         if (rtcdev)
                 return -EBUSY;
  
-       if (!rtc->ops->set_alarm)
+       if (!test_bit(RTC_FEATURE_ALARM, rtc->features))
                 return -1;
         if (!device_may_wakeup(rtc->dev.parent))
                 return -1;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c

index 560e4c8..a21ef9c 100644 (file)
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3704,6 +3704,9 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
                 goto print;
  
         while (*p) {
+               bool star = false;
+               int len = 0;
+
                 j = 0;
  
                 /* We only care about %s and variants */
@@ -3725,13 +3728,17 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
                                 /* Need to test cases like %08.*s */
                                 for (j = 1; p[i+j]; j++) {
                                         if (isdigit(p[i+j]) ||
-                                           p[i+j] == '*' ||
                                             p[i+j] == '.')
                                                 continue;
+                                       if (p[i+j] == '*') {
+                                               star = true;
+                                               continue;
+                                       }
                                         break;
                                 }
                                 if (p[i+j] == 's')
                                         break;
+                               star = false;
                         }
                         j = 0;
                 }
@@ -3744,6 +3751,9 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
                 iter->fmt[i] = '\0';
                 trace_seq_vprintf(&iter->seq, iter->fmt, ap);
  
+               if (star)
+                       len = va_arg(ap, int);
+
                 /* The ap now points to the string data of the %s */
                 str = va_arg(ap, const char *);
  
@@ -3762,8 +3772,18 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
                         int ret;
  
                         /* Try to safely read the string */
-                       ret = strncpy_from_kernel_nofault(iter->fmt, str,
-                                                         iter->fmt_size);
+                       if (star) {
+                               if (len + 1 > iter->fmt_size)
+                                       len = iter->fmt_size - 1;
+                               if (len < 0)
+                                       len = 0;
+                               ret = copy_from_kernel_nofault(iter->fmt, str, len);
+                               iter->fmt[len] = 0;
+                               star = false;
+                       } else {
+                               ret = strncpy_from_kernel_nofault(iter->fmt, str,
+                                                                 iter->fmt_size);
+                       }
                         if (ret < 0)
                                 trace_seq_printf(&iter->seq, "(0x%px)", str);
                         else
@@ -3775,7 +3795,10 @@ void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
                         strncpy(iter->fmt, p + i, j + 1);
                         iter->fmt[j+1] = '\0';
                 }
-               trace_seq_printf(&iter->seq, iter->fmt, str);
+               if (star)
+                       trace_seq_printf(&iter->seq, iter->fmt, len, str);
+               else
+                       trace_seq_printf(&iter->seq, iter->fmt, str);
  
                 p += i + j + 1;
         }
diff --git a/lib/test_kasan.c b/lib/test_kasan.c

index dc05cfc..cacbbbd 100644 (file)
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -654,8 +654,20 @@ static char global_array[10];
  
  static void kasan_global_oob(struct kunit *test)
  {
-       volatile int i = 3;
-       char *p = &global_array[ARRAY_SIZE(global_array) + i];
+       /*
+        * Deliberate out-of-bounds access. To prevent CONFIG_UBSAN_LOCAL_BOUNDS
+        * from failing here and panicing the kernel, access the array via a
+        * volatile pointer, which will prevent the compiler from being able to
+        * determine the array bounds.
+        *
+        * This access uses a volatile pointer to char (char *volatile) rather
+        * than the more conventional pointer to volatile char (volatile char *)
+        * because we want to prevent the compiler from making inferences about
+        * the pointer itself (i.e. its array bounds), not the data that it
+        * refers to.
+        */
+       char *volatile array = global_array;
+       char *p = &array[ARRAY_SIZE(global_array) + 3];
  
         /* Only generic mode instruments globals. */
         KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
@@ -703,8 +715,9 @@ static void ksize_uaf(struct kunit *test)
  static void kasan_stack_oob(struct kunit *test)
  {
         char stack_array[10];
-       volatile int i = OOB_TAG_OFF;
-       char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
+       /* See comment in kasan_global_oob. */
+       char *volatile array = stack_array;
+       char *p = &array[ARRAY_SIZE(stack_array) + OOB_TAG_OFF];
  
         KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
  
@@ -715,7 +728,9 @@ static void kasan_alloca_oob_left(struct kunit *test)
  {
         volatile int i = 10;
         char alloca_array[i];
-       char *p = alloca_array - 1;
+       /* See comment in kasan_global_oob. */
+       char *volatile array = alloca_array;
+       char *p = array - 1;
  
         /* Only generic mode instruments dynamic allocas. */
         KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
@@ -728,7 +743,9 @@ static void kasan_alloca_oob_right(struct kunit *test)
  {
         volatile int i = 10;
         char alloca_array[i];
-       char *p = alloca_array + i;
+       /* See comment in kasan_global_oob. */
+       char *volatile array = alloca_array;
+       char *p = array + i;
  
         /* Only generic mode instruments dynamic allocas. */
         KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c

index 3db405d..95918f4 100644 (file)
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -4056,6 +4056,7 @@ again:
                                  * See Documentation/vm/mmu_notifier.rst
                                  */
                                 huge_ptep_set_wrprotect(src, addr, src_pte);
+                               entry = huge_pte_wrprotect(entry);
                         }
  
                         page_dup_rmap(ptepage, true);
diff --git a/mm/ioremap.c b/mm/ioremap.c

index d1dcc7e..8ee0136 100644 (file)
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -16,16 +16,16 @@
  #include "pgalloc-track.h"
  
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
-static bool __ro_after_init iomap_max_page_shift = PAGE_SHIFT;
+static unsigned int __ro_after_init iomap_max_page_shift = BITS_PER_LONG - 1;
  
  static int __init set_nohugeiomap(char *str)
  {
-       iomap_max_page_shift = P4D_SHIFT;
+       iomap_max_page_shift = PAGE_SHIFT;
         return 0;
  }
  early_param("nohugeiomap", set_nohugeiomap);
  #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
-static const bool iomap_max_page_shift = PAGE_SHIFT;
+static const unsigned int iomap_max_page_shift = PAGE_SHIFT;
  #endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
  
  int ioremap_page_range(unsigned long addr,
diff --git a/mm/ksm.c b/mm/ksm.c

index 6bbe314..2f3aaeb 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -776,11 +776,12 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item)
                 struct page *page;
  
                 stable_node = rmap_item->head;
-               page = get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK);
+               page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
                 if (!page)
                         goto out;
  
                 hlist_del(&rmap_item->hlist);
+               unlock_page(page);
                 put_page(page);
  
                 if (!hlist_empty(&stable_node->hlist))
diff --git a/mm/shmem.c b/mm/shmem.c

index a08cede..5d46611 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2258,25 +2258,11 @@ out_nomem:
  static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
  {
         struct shmem_inode_info *info = SHMEM_I(file_inode(file));
+       int ret;
  
-       if (info->seals & F_SEAL_FUTURE_WRITE) {
-               /*
-                * New PROT_WRITE and MAP_SHARED mmaps are not allowed when
-                * "future write" seal active.
-                */
-               if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_WRITE))
-                       return -EPERM;
-
-               /*
-                * Since an F_SEAL_FUTURE_WRITE sealed memfd can be mapped as
-                * MAP_SHARED and read-only, take care to not allow mprotect to
-                * revert protections on such mappings. Do this only for shared
-                * mappings. For private mappings, don't need to mask
-                * VM_MAYWRITE as we still want them to be COW-writable.
-                */
-               if (vma->vm_flags & VM_SHARED)
-                       vma->vm_flags &= ~(VM_MAYWRITE);
-       }
+       ret = seal_check_future_write(info->seals, vma);
+       if (ret)
+               return ret;
  
         /* arm64 - allow memory tagging on RAM-based files */
         vma->vm_flags |= VM_MTE_ALLOWED;
@@ -2375,8 +2361,18 @@ static int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
         pgoff_t offset, max_off;
  
         ret = -ENOMEM;
-       if (!shmem_inode_acct_block(inode, 1))
+       if (!shmem_inode_acct_block(inode, 1)) {
+               /*
+                * We may have got a page, returned -ENOENT triggering a retry,
+                * and now we find ourselves with -ENOMEM. Release the page, to
+                * avoid a BUG_ON in our caller.
+                */
+               if (unlikely(*pagep)) {
+                       put_page(*pagep);
+                       *pagep = NULL;
+               }
                 goto out;
+       }
  
         if (!*pagep) {
                 page = shmem_alloc_page(gfp, info, pgoff);
diff --git a/mm/slab_common.c b/mm/slab_common.c

index f8833d3..a4a5714 100644 (file)
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -318,6 +318,16 @@ kmem_cache_create_usercopy(const char *name,
         const char *cache_name;
         int err;
  
+#ifdef CONFIG_SLUB_DEBUG
+       /*
+        * If no slub_debug was enabled globally, the static key is not yet
+        * enabled by setup_slub_debug(). Enable it if the cache is being
+        * created with any of the debugging flags passed explicitly.
+        */
+       if (flags & SLAB_DEBUG_FLAGS)
+               static_branch_enable(&slub_debug_enabled);
+#endif
+
         mutex_lock(&slab_mutex);
  
         err = kmem_cache_sanity_check(name, size);
diff --git a/mm/slub.c b/mm/slub.c

index feda53a..438fa8d 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3828,15 +3828,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
  
  static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
  {
-#ifdef CONFIG_SLUB_DEBUG
-       /*
-        * If no slub_debug was enabled globally, the static key is not yet
-        * enabled by setup_slub_debug(). Enable it if the cache is being
-        * created with any of the debugging flags passed explicitly.
-        */
-       if (flags & SLAB_DEBUG_FLAGS)
-               static_branch_enable(&slub_debug_enabled);
-#endif
         s->flags = kmem_cache_flags(s->size, flags, s->name);
  #ifdef CONFIG_SLAB_FREELIST_HARDENED
         s->random = get_random_long();
diff --git a/net/core/page_pool.c b/net/core/page_pool.c

index 9ec1aa9..3c4c4c7 100644 (file)
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -174,8 +174,10 @@ static void page_pool_dma_sync_for_device(struct page_pool *pool,
                                           struct page *page,
                                           unsigned int dma_sync_size)
  {
+       dma_addr_t dma_addr = page_pool_get_dma_addr(page);
+
         dma_sync_size = min(dma_sync_size, pool->p.max_len);
-       dma_sync_single_range_for_device(pool->p.dev, page->dma_addr,
+       dma_sync_single_range_for_device(pool->p.dev, dma_addr,
                                          pool->p.offset, dma_sync_size,
                                          pool->p.dma_dir);
  }
@@ -195,7 +197,7 @@ static bool page_pool_dma_map(struct page_pool *pool, struct page *page)
         if (dma_mapping_error(pool->p.dev, dma))
                 return false;
  
-       page->dma_addr = dma;
+       page_pool_set_dma_addr(page, dma);
  
         if (pool->p.flags & PP_FLAG_DMA_SYNC_DEV)
                 page_pool_dma_sync_for_device(pool, page, pool->p.max_len);
@@ -331,13 +333,13 @@ void page_pool_release_page(struct page_pool *pool, struct page *page)
                  */
                 goto skip_dma_unmap;
  
-       dma = page->dma_addr;
+       dma = page_pool_get_dma_addr(page);
  
-       /* When page is unmapped, it cannot be returned our pool */
+       /* When page is unmapped, it cannot be returned to our pool */
         dma_unmap_page_attrs(pool->p.dev, dma,
                              PAGE_SIZE << pool->p.order, pool->p.dma_dir,
                              DMA_ATTR_SKIP_CPU_SYNC);
-       page->dma_addr = 0;
+       page_pool_set_dma_addr(page, 0);
  skip_dma_unmap:
         /* This may be the last page returned, releasing the pool, so
          * it is not safe to reference pool afterwards.
diff --git a/security/keys/trusted-keys/trusted_tpm1.c b/security/keys/trusted-keys/trusted_tpm1.c

index 4693945..aa108be 100644 (file)
--- a/security/keys/trusted-keys/trusted_tpm1.c
+++ b/security/keys/trusted-keys/trusted_tpm1.c
@@ -493,10 +493,12 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype,
  
         ret = tpm_get_random(chip, td->nonceodd, TPM_NONCE_SIZE);
         if (ret < 0)
-               return ret;
+               goto out;
  
-       if (ret != TPM_NONCE_SIZE)
-               return -EIO;
+       if (ret != TPM_NONCE_SIZE) {
+               ret = -EIO;
+               goto out;
+       }
  
         ordinal = htonl(TPM_ORD_SEAL);
         datsize = htonl(datalen);
diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c

index 617fabd..0165da3 100644 (file)
--- a/security/keys/trusted-keys/trusted_tpm2.c
+++ b/security/keys/trusted-keys/trusted_tpm2.c
@@ -336,9 +336,9 @@ out:
                         rc = -EPERM;
         }
         if (blob_len < 0)
-               return blob_len;
-
-       payload->blob_len = blob_len;
+               rc = blob_len;
+       else
+               payload->blob_len = blob_len;
  
         tpm_put_ops(chip);
         return rc;
diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h

index cc79856..4ba87de 100644 (file)
--- a/tools/arch/powerpc/include/uapi/asm/errno.h
+++ b/tools/arch/powerpc/include/uapi/asm/errno.h
@@ -2,6 +2,7 @@
  #ifndef _ASM_POWERPC_ERRNO_H
  #define _ASM_POWERPC_ERRNO_H
  
+#undef EDEADLOCK
  #include <asm-generic/errno.h>
  
  #undef EDEADLOCK
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h

index cc96e26..ac37830 100644 (file)
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -84,7 +84,7 @@
  
  /* CPU types for specific tunings: */
  #define X86_FEATURE_K8                 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7                 ( 3*32+ 5) /* "" Athlon */
+/* FREE, was #define X86_FEATURE_K7                    ( 3*32+ 5) "" Athlon */
  #define X86_FEATURE_P3                 ( 3*32+ 6) /* "" P3 */
  #define X86_FEATURE_P4                 ( 3*32+ 7) /* "" P4 */
  #define X86_FEATURE_CONSTANT_TSC       ( 3*32+ 8) /* TSC ticks at a constant rate */
@@ -236,6 +236,8 @@
  #define X86_FEATURE_EPT_AD             ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
  #define X86_FEATURE_VMCALL             ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
  #define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK           ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT                ( 8*32+21) /* "" PV vcpu_is_preempted function */
  
  /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
  #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
@@ -290,6 +292,8 @@
  #define X86_FEATURE_FENCE_SWAPGS_KERNEL        (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
  #define X86_FEATURE_SPLIT_LOCK_DETECT  (11*32+ 6) /* #AC for split lock */
  #define X86_FEATURE_PER_THREAD_MBA     (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1               (11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2               (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
  
  /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
  #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
@@ -336,6 +340,7 @@
  #define X86_FEATURE_AVIC               (15*32+13) /* Virtual Interrupt Controller */
  #define X86_FEATURE_V_VMSAVE_VMLOAD    (15*32+15) /* Virtual VMSAVE VMLOAD */
  #define X86_FEATURE_VGIF               (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_V_SPEC_CTRL                (15*32+20) /* Virtual SPEC_CTRL */
  #define X86_FEATURE_SVME_ADDR_CHK      (15*32+28) /* "" SVME addr check */
  
  /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
@@ -354,6 +359,7 @@
  #define X86_FEATURE_AVX512_VPOPCNTDQ   (16*32+14) /* POPCNT for vectors of DW/QW */
  #define X86_FEATURE_LA57               (16*32+16) /* 5-level page tables */
  #define X86_FEATURE_RDPID              (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT    (16*32+24) /* Bus Lock detect */
  #define X86_FEATURE_CLDEMOTE           (16*32+25) /* CLDEMOTE instruction */
  #define X86_FEATURE_MOVDIRI            (16*32+27) /* MOVDIRI instruction */
  #define X86_FEATURE_MOVDIR64B          (16*32+28) /* MOVDIR64B instruction */
@@ -374,6 +380,7 @@
  #define X86_FEATURE_MD_CLEAR           (18*32+10) /* VERW clears CPU buffers */
  #define X86_FEATURE_TSX_FORCE_ABORT    (18*32+13) /* "" TSX_FORCE_ABORT */
  #define X86_FEATURE_SERIALIZE          (18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU         (18*32+15) /* "" This part has CPUs of more than one type */
  #define X86_FEATURE_TSXLDTRK           (18*32+16) /* TSX Suspend Load Address Tracking */
  #define X86_FEATURE_PCONFIG            (18*32+18) /* Intel PCONFIG */
  #define X86_FEATURE_ARCH_LBR           (18*32+19) /* Intel ARCH LBR */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h

index 4502935..211ba33 100644 (file)
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -185,6 +185,9 @@
  #define MSR_PEBS_DATA_CFG              0x000003f2
  #define MSR_IA32_DS_AREA               0x00000600
  #define MSR_IA32_PERF_CAPABILITIES     0x00000345
+#define PERF_CAP_METRICS_IDX           15
+#define PERF_CAP_PT_IDX                        16
+
  #define MSR_PEBS_LD_LAT_THRESHOLD      0x000003f6
  
  #define MSR_IA32_RTIT_CTL              0x00000570
@@ -265,6 +268,7 @@
  #define DEBUGCTLMSR_LBR                        (1UL <<  0) /* last branch recording */
  #define DEBUGCTLMSR_BTF_SHIFT          1
  #define DEBUGCTLMSR_BTF                        (1UL <<  1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT    (1UL <<  2)
  #define DEBUGCTLMSR_TR                 (1UL <<  6)
  #define DEBUGCTLMSR_BTS                        (1UL <<  7)
  #define DEBUGCTLMSR_BTINT              (1UL <<  8)
@@ -533,9 +537,9 @@
  /* K8 MSRs */
  #define MSR_K8_TOP_MEM1                        0xc001001a
  #define MSR_K8_TOP_MEM2                        0xc001001d
-#define MSR_K8_SYSCFG                  0xc0010010
-#define MSR_K8_SYSCFG_MEM_ENCRYPT_BIT  23
-#define MSR_K8_SYSCFG_MEM_ENCRYPT      BIT_ULL(MSR_K8_SYSCFG_MEM_ENCRYPT_BIT)
+#define MSR_AMD64_SYSCFG               0xc0010010
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT       23
+#define MSR_AMD64_SYSCFG_MEM_ENCRYPT   BIT_ULL(MSR_AMD64_SYSCFG_MEM_ENCRYPT_BIT)
  #define MSR_K8_INT_PENDING_MSG         0xc0010055
  /* C1E active bits in int pending message */
  #define K8_INTP_C1E_ACTIVE_MASK                0x18000000
diff --git a/tools/arch/x86/include/uapi/asm/vmx.h b/tools/arch/x86/include/uapi/asm/vmx.h

index b8e650a..946d761 100644 (file)
--- a/tools/arch/x86/include/uapi/asm/vmx.h
+++ b/tools/arch/x86/include/uapi/asm/vmx.h
@@ -27,6 +27,7 @@
  
  
  #define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE      0x08000000
  
  #define EXIT_REASON_EXCEPTION_NMI       0
  #define EXIT_REASON_EXTERNAL_INTERRUPT  1
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S

index 1e299ac..1cc9da6 100644 (file)
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,7 @@
  #include <linux/linkage.h>
  #include <asm/errno.h>
  #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
  #include <asm/export.h>
  
  .pushsection .noinstr.text, "ax"
diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S

index 0bfd26e..9827ae2 100644 (file)
--- a/tools/arch/x86/lib/memset_64.S
+++ b/tools/arch/x86/lib/memset_64.S
@@ -3,7 +3,7 @@
  
  #include <linux/linkage.h>
  #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
  #include <asm/export.h>
  
  /*
diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h

deleted file mode 100644 (file)

index b54bd86..0000000
--- a/tools/include/asm/alternative-asm.h
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
-#define _TOOLS_ASM_ALTERNATIVE_ASM_H
-
-/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
-
-#define altinstruction_entry #
-#define ALTERNATIVE_2 #
-
-#endif
diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h

new file mode 100644 (file)

index 0000000..b54bd86
--- /dev/null
+++ b/tools/include/asm/alternative.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
+#define _TOOLS_ASM_ALTERNATIVE_ASM_H
+
+/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
+
+#define altinstruction_entry #
+#define ALTERNATIVE_2 #
+
+#endif
diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h

index ce58cff..6de5a7f 100644 (file)
--- a/tools/include/uapi/asm-generic/unistd.h
+++ b/tools/include/uapi/asm-generic/unistd.h
@@ -863,9 +863,18 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
  __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
  #define __NR_mount_setattr 442
  __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
+#define __NR_quotactl_path 443
+__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+
+#define __NR_landlock_create_ruleset 444
+__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
+#define __NR_landlock_add_rule 445
+__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
+#define __NR_landlock_restrict_self 446
+__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
  
  #undef __NR_syscalls
-#define __NR_syscalls 443
+#define __NR_syscalls 447
  
  /*
   * 32 bit systems traditionally used different
diff --git a/tools/include/uapi/drm/drm.h b/tools/include/uapi/drm/drm.h

index 0827037..67b94bc 100644 (file)
--- a/tools/include/uapi/drm/drm.h
+++ b/tools/include/uapi/drm/drm.h
@@ -625,30 +625,147 @@ struct drm_gem_open {
         __u64 size;
  };
  
+/**
+ * DRM_CAP_DUMB_BUFFER
+ *
+ * If set to 1, the driver supports creating dumb buffers via the
+ * &DRM_IOCTL_MODE_CREATE_DUMB ioctl.
+ */
  #define DRM_CAP_DUMB_BUFFER            0x1
+/**
+ * DRM_CAP_VBLANK_HIGH_CRTC
+ *
+ * If set to 1, the kernel supports specifying a CRTC index in the high bits of
+ * &drm_wait_vblank_request.type.
+ *
+ * Starting kernel version 2.6.39, this capability is always set to 1.
+ */
  #define DRM_CAP_VBLANK_HIGH_CRTC       0x2
+/**
+ * DRM_CAP_DUMB_PREFERRED_DEPTH
+ *
+ * The preferred bit depth for dumb buffers.
+ *
+ * The bit depth is the number of bits used to indicate the color of a single
+ * pixel excluding any padding. This is different from the number of bits per
+ * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per
+ * pixel.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
  #define DRM_CAP_DUMB_PREFERRED_DEPTH   0x3
+/**
+ * DRM_CAP_DUMB_PREFER_SHADOW
+ *
+ * If set to 1, the driver prefers userspace to render to a shadow buffer
+ * instead of directly rendering to a dumb buffer. For best speed, userspace
+ * should do streaming ordered memory copies into the dumb buffer and never
+ * read from it.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
  #define DRM_CAP_DUMB_PREFER_SHADOW     0x4
+/**
+ * DRM_CAP_PRIME
+ *
+ * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
+ * and &DRM_PRIME_CAP_EXPORT.
+ *
+ * PRIME buffers are exposed as dma-buf file descriptors. See
+ * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".
+ */
  #define DRM_CAP_PRIME                  0x5
+/**
+ * DRM_PRIME_CAP_IMPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
+ * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
+ */
  #define  DRM_PRIME_CAP_IMPORT          0x1
+/**
+ * DRM_PRIME_CAP_EXPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
+ * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
+ */
  #define  DRM_PRIME_CAP_EXPORT          0x2
+/**
+ * DRM_CAP_TIMESTAMP_MONOTONIC
+ *
+ * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in
+ * struct drm_event_vblank. If set to 1, the kernel will report timestamps with
+ * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these
+ * clocks.
+ *
+ * Starting from kernel version 2.6.39, the default value for this capability
+ * is 1. Starting kernel version 4.15, this capability is always set to 1.
+ */
  #define DRM_CAP_TIMESTAMP_MONOTONIC    0x6
+/**
+ * DRM_CAP_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
+ */
  #define DRM_CAP_ASYNC_PAGE_FLIP                0x7
-/*
- * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight
- * combination for the hardware cursor. The intention is that a hardware
- * agnostic userspace can query a cursor plane size to use.
+/**
+ * DRM_CAP_CURSOR_WIDTH
+ *
+ * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid
+ * width x height combination for the hardware cursor. The intention is that a
+ * hardware agnostic userspace can query a cursor plane size to use.
   *
   * Note that the cross-driver contract is to merely return a valid size;
   * drivers are free to attach another meaning on top, eg. i915 returns the
   * maximum plane size.
   */
  #define DRM_CAP_CURSOR_WIDTH           0x8
+/**
+ * DRM_CAP_CURSOR_HEIGHT
+ *
+ * See &DRM_CAP_CURSOR_WIDTH.
+ */
  #define DRM_CAP_CURSOR_HEIGHT          0x9
+/**
+ * DRM_CAP_ADDFB2_MODIFIERS
+ *
+ * If set to 1, the driver supports supplying modifiers in the
+ * &DRM_IOCTL_MODE_ADDFB2 ioctl.
+ */
  #define DRM_CAP_ADDFB2_MODIFIERS       0x10
+/**
+ * DRM_CAP_PAGE_FLIP_TARGET
+ *
+ * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and
+ * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in
+ * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP
+ * ioctl.
+ */
  #define DRM_CAP_PAGE_FLIP_TARGET       0x11
+/**
+ * DRM_CAP_CRTC_IN_VBLANK_EVENT
+ *
+ * If set to 1, the kernel supports reporting the CRTC ID in
+ * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and
+ * &DRM_EVENT_FLIP_COMPLETE events.
+ *
+ * Starting kernel version 4.12, this capability is always set to 1.
+ */
  #define DRM_CAP_CRTC_IN_VBLANK_EVENT   0x12
+/**
+ * DRM_CAP_SYNCOBJ
+ *
+ * If set to 1, the driver supports sync objects. See
+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ */
  #define DRM_CAP_SYNCOBJ                0x13
+/**
+ * DRM_CAP_SYNCOBJ_TIMELINE
+ *
+ * If set to 1, the driver supports timeline operations on sync objects. See
+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ */
  #define DRM_CAP_SYNCOBJ_TIMELINE       0x14
  
  /* DRM_IOCTL_GET_CAP ioctl argument type */
diff --git a/tools/include/uapi/drm/i915_drm.h b/tools/include/uapi/drm/i915_drm.h

index 1987e2e..ddc47bb 100644 (file)
--- a/tools/include/uapi/drm/i915_drm.h
+++ b/tools/include/uapi/drm/i915_drm.h
@@ -943,6 +943,7 @@ struct drm_i915_gem_exec_object {
         __u64 offset;
  };
  
+/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */
  struct drm_i915_gem_execbuffer {
         /**
          * List of buffers to be validated with their relocations to be
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h

index f6afee2..3fd9a7e 100644 (file)
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1078,6 +1078,10 @@ struct kvm_ppc_resize_hpt {
  #define KVM_CAP_DIRTY_LOG_RING 192
  #define KVM_CAP_X86_BUS_LOCK_EXIT 193
  #define KVM_CAP_PPC_DAWR1 194
+#define KVM_CAP_SET_GUEST_DEBUG2 195
+#define KVM_CAP_SGX_ATTRIBUTE 196
+#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
+#define KVM_CAP_PTP_KVM 198
  
  #ifdef KVM_CAP_IRQ_ROUTING
  
@@ -1671,6 +1675,8 @@ enum sev_cmd_id {
         KVM_SEV_CERT_EXPORT,
         /* Attestation report */
         KVM_SEV_GET_ATTESTATION_REPORT,
+       /* Guest Migration Extension */
+       KVM_SEV_SEND_CANCEL,
  
         KVM_SEV_NR_MAX,
  };
@@ -1729,6 +1735,45 @@ struct kvm_sev_attestation_report {
         __u32 len;
  };
  
+struct kvm_sev_send_start {
+       __u32 policy;
+       __u64 pdh_cert_uaddr;
+       __u32 pdh_cert_len;
+       __u64 plat_certs_uaddr;
+       __u32 plat_certs_len;
+       __u64 amd_certs_uaddr;
+       __u32 amd_certs_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_send_update_data {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+struct kvm_sev_receive_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 pdh_uaddr;
+       __u32 pdh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_receive_update_data {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
  #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
  #define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
  #define KVM_DEV_ASSIGN_MASK_INTX       (1 << 2)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h

index 14332f4..bf81435 100644 (file)
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -127,6 +127,7 @@ enum perf_sw_ids {
         PERF_COUNT_SW_EMULATION_FAULTS          = 8,
         PERF_COUNT_SW_DUMMY                     = 9,
         PERF_COUNT_SW_BPF_OUTPUT                = 10,
+       PERF_COUNT_SW_CGROUP_SWITCHES           = 11,
  
         PERF_COUNT_SW_MAX,                      /* non-ABI */
  };
@@ -326,6 +327,7 @@ enum perf_event_read_format {
  #define PERF_ATTR_SIZE_VER4    104     /* add: sample_regs_intr */
  #define PERF_ATTR_SIZE_VER5    112     /* add: aux_watermark */
  #define PERF_ATTR_SIZE_VER6    120     /* add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7    128     /* add: sig_data */
  
  /*
   * Hardware event_id to monitor via a performance monitoring event:
@@ -404,7 +406,10 @@ struct perf_event_attr {
                                 cgroup         :  1, /* include cgroup events */
                                 text_poke      :  1, /* include text poke events */
                                 build_id       :  1, /* use build id in mmap2 events */
-                               __reserved_1   : 29;
+                               inherit_thread :  1, /* children only inherit if cloned with CLONE_THREAD */
+                               remove_on_exec :  1, /* event is removed from task on exec */
+                               sigtrap        :  1, /* send synchronous SIGTRAP on event */
+                               __reserved_1   : 26;
  
         union {
                 __u32           wakeup_events;    /* wakeup every n events */
@@ -456,6 +461,12 @@ struct perf_event_attr {
         __u16   __reserved_2;
         __u32   aux_sample_size;
         __u32   __reserved_3;
+
+       /*
+        * User provided data if sigtrap=1, passed back to user via
+        * siginfo_t::si_perf, e.g. to permit user to identify the event.
+        */
+       __u64   sig_data;
  };
  
  /*
@@ -1171,10 +1182,15 @@ enum perf_callchain_context {
  /**
   * PERF_RECORD_AUX::flags bits
   */
-#define PERF_AUX_FLAG_TRUNCATED                0x01    /* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE                0x02    /* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL          0x04    /* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION                0x08    /* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED                        0x01    /* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE                        0x02    /* snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL                  0x04    /* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION                        0x08    /* sample collided with another */
+#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK     0xff00  /* PMU specific trace format type */
+
+/* CoreSight PMU AUX buffer formats */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT       0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW             0x0100 /* Raw format of the source */
  
  #define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
  #define PERF_FLAG_FD_OUTPUT            (1UL << 1)
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h

index 667f1ae..18a9f59 100644 (file)
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -255,4 +255,8 @@ struct prctl_mm_map {
  # define SYSCALL_DISPATCH_FILTER_ALLOW 0
  # define SYSCALL_DISPATCH_FILTER_BLOCK 1
  
+/* Set/get enabled arm64 pointer authentication keys */
+#define PR_PAC_SET_ENABLED_KEYS                60
+#define PR_PAC_GET_ENABLED_KEYS                61
+
  #endif /* _LINUX_PRCTL_H */
diff --git a/tools/kvm/kvm_stat/kvm_stat.txt b/tools/kvm/kvm_stat/kvm_stat.txt

index feaf464..3a9f203 100644 (file)
--- a/tools/kvm/kvm_stat/kvm_stat.txt
+++ b/tools/kvm/kvm_stat/kvm_stat.txt
@@ -111,7 +111,7 @@ OPTIONS
  --tracepoints::
          retrieve statistics from tracepoints
  
-*z*::
+-z::
  --skip-zero-records::
          omit records with all zeros in logging mode
  
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c

index cedf3ed..24295d3 100644 (file)
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -19,6 +19,7 @@
  #include <objtool/elf.h>
  #include <objtool/arch.h>
  #include <objtool/warn.h>
+#include <objtool/endianness.h>
  #include <arch/elf.h>
  
  static int is_x86_64(const struct elf *elf)
@@ -725,7 +726,7 @@ static int elf_add_alternative(struct elf *elf,
                 return -1;
         }
  
-       alt->cpuid = cpuid;
+       alt->cpuid = bswap_if_needed(cpuid);
         alt->instrlen = orig_len;
         alt->replacementlen = repl_len;
  
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c

index d08f5f3..743c2e9 100644 (file)
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -762,6 +762,7 @@ struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
         data->d_buf = &sym->sym;
         data->d_size = sizeof(sym->sym);
         data->d_align = 1;
+       data->d_type = ELF_T_SYM;
  
         sym->idx = symtab->len / sizeof(sym->sym);
  
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config

index 0d66190..406a951 100644 (file)
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -540,6 +540,7 @@ ifndef NO_LIBELF
        ifdef LIBBPF_DYNAMIC
          ifeq ($(feature-libbpf), 1)
            EXTLIBS += -lbpf
+          $(call detected,CONFIG_LIBBPF_DYNAMIC)
          else
            dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
          endif
diff --git a/tools/perf/arch/arm64/util/kvm-stat.c b/tools/perf/arch/arm64/util/kvm-stat.c

index 2303256..73d18e0 100644 (file)
--- a/tools/perf/arch/arm64/util/kvm-stat.c
+++ b/tools/perf/arch/arm64/util/kvm-stat.c
@@ -71,7 +71,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = {
                 .name   = "vmexit",
                 .ops    = &exit_events,
         },
-       { NULL },
+       { NULL, NULL },
  };
  
  const char * const kvm_skip_events[] = {
diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl

index 9164969..9974f5f 100644 (file)
--- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
+++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
@@ -356,3 +356,8 @@
  439    n64     faccessat2                      sys_faccessat2
  440    n64     process_madvise                 sys_process_madvise
  441    n64     epoll_pwait2                    sys_epoll_pwait2
+442    n64     mount_setattr                   sys_mount_setattr
+443    n64     quotactl_path                   sys_quotactl_path
+444    n64     landlock_create_ruleset         sys_landlock_create_ruleset
+445    n64     landlock_add_rule               sys_landlock_add_rule
+446    n64     landlock_restrict_self          sys_landlock_restrict_self
diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl

index 0b2480c..2e68fbb 100644 (file)
--- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
@@ -522,3 +522,7 @@
  440    common  process_madvise                 sys_process_madvise
  441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
  442    common  mount_setattr                   sys_mount_setattr
+443    common  quotactl_path                   sys_quotactl_path
+444    common  landlock_create_ruleset         sys_landlock_create_ruleset
+445    common  landlock_add_rule               sys_landlock_add_rule
+446    common  landlock_restrict_self          sys_landlock_restrict_self
diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl

index 3abef21..7e4a2ab 100644 (file)
--- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl
+++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl
@@ -445,3 +445,7 @@
  440  common    process_madvise         sys_process_madvise             sys_process_madvise
  441  common    epoll_pwait2            sys_epoll_pwait2                compat_sys_epoll_pwait2
  442  common    mount_setattr           sys_mount_setattr               sys_mount_setattr
+443  common    quotactl_path           sys_quotactl_path               sys_quotactl_path
+444  common    landlock_create_ruleset sys_landlock_create_ruleset     sys_landlock_create_ruleset
+445  common    landlock_add_rule       sys_landlock_add_rule           sys_landlock_add_rule
+446  common    landlock_restrict_self  sys_landlock_restrict_self      sys_landlock_restrict_self
diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl

index 7bf01cb..ecd551b 100644 (file)
--- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
@@ -364,6 +364,10 @@
  440    common  process_madvise         sys_process_madvise
  441    common  epoll_pwait2            sys_epoll_pwait2
  442    common  mount_setattr           sys_mount_setattr
+443    common  quotactl_path           sys_quotactl_path
+444    common  landlock_create_ruleset sys_landlock_create_ruleset
+445    common  landlock_add_rule       sys_landlock_add_rule
+446    common  landlock_restrict_self  sys_landlock_restrict_self
  
  #
  # Due to a historical design error, certain syscalls are numbered differently
diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c

index ed4f0bd..7422b0e 100644 (file)
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -1123,8 +1123,10 @@ static int process_one_file(const char *fpath, const struct stat *sb,
                         mapfile = strdup(fpath);
                         return 0;
                 }
-
-               pr_info("%s: Ignoring file %s\n", prog, fpath);
+               if (is_json_file(bname))
+                       pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath);
+               else
+                       pr_info("%s: Ignoring file %s\n", prog, fpath);
                 return 0;
         }
  
diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record

index 645009c..4a7b8de 100644 (file)
--- a/tools/perf/tests/attr/base-record
+++ b/tools/perf/tests/attr/base-record
@@ -5,7 +5,7 @@ group_fd=-1
  flags=0|8
  cpu=*
  type=0|1
-size=120
+size=128
  config=0
  sample_period=*
  sample_type=263
diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat

index b0f42c3..4081644 100644 (file)
--- a/tools/perf/tests/attr/base-stat
+++ b/tools/perf/tests/attr/base-stat
@@ -5,7 +5,7 @@ group_fd=-1
  flags=0|8
  cpu=*
  type=0
-size=120
+size=128
  config=0
  sample_period=0
  sample_type=65536
diff --git a/tools/perf/tests/attr/system-wide-dummy b/tools/perf/tests/attr/system-wide-dummy

index eba723c..86a15dd 100644 (file)
--- a/tools/perf/tests/attr/system-wide-dummy
+++ b/tools/perf/tests/attr/system-wide-dummy
@@ -7,7 +7,7 @@ cpu=*
  pid=-1
  flags=8
  type=1
-size=120
+size=128
  config=9
  sample_period=4000
  sample_type=455
diff --git a/tools/perf/util/Build b/tools/perf/util/Build

index 8c0d9f3..b64bdc1 100644 (file)
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -145,7 +145,14 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o
  perf-$(CONFIG_LIBELF) += probe-file.o
  perf-$(CONFIG_LIBELF) += probe-event.o
  
+ifdef CONFIG_LIBBPF_DYNAMIC
+  hashmap := 1
+endif
  ifndef CONFIG_LIBBPF
+  hashmap := 1
+endif
+
+ifdef hashmap
  perf-y += hashmap.o
  endif
  
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c

index f99852d..43e5b56 100644 (file)
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -157,9 +157,15 @@ static int get_max_rate(unsigned int *rate)
  static int record_opts__config_freq(struct record_opts *opts)
  {
         bool user_freq = opts->user_freq != UINT_MAX;
+       bool user_interval = opts->user_interval != ULLONG_MAX;
         unsigned int max_rate;
  
-       if (opts->user_interval != ULLONG_MAX)
+       if (user_interval && user_freq) {
+               pr_err("cannot set frequency and period at the same time\n");
+               return -1;
+       }
+
+       if (user_interval)
                 opts->default_interval = opts->user_interval;
         if (user_freq)
                 opts->freq = opts->user_freq;
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c

index a12cf4f..106b3d6 100644 (file)
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -904,7 +904,7 @@ static void perf_event__cpu_map_swap(union perf_event *event,
         struct perf_record_record_cpu_map *mask;
         unsigned i;
  
-       data->type = bswap_64(data->type);
+       data->type = bswap_16(data->type);
  
         switch (data->type) {
         case PERF_CPU_MAP__CPUS:
@@ -937,7 +937,7 @@ static void perf_event__stat_config_swap(union perf_event *event,
  {
         u64 size;
  
-       size  = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+       size  = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]);
         size += 1; /* nr item itself */
         mem_bswap_64(&event->stat_config.nr, size);
  }
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c

index c62d372..ed563bd 100644 (file)
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -62,7 +62,7 @@ struct nfit_test_resource *get_nfit_res(resource_size_t resource)
  }
  EXPORT_SYMBOL(get_nfit_res);
  
-void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
+static void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size,
                 void __iomem *(*fallback_fn)(resource_size_t, unsigned long))
  {
         struct nfit_test_resource *nfit_res = get_nfit_res(offset);
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c

index 9b185bf..54f367c 100644 (file)
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1871,9 +1871,16 @@ static void smart_init(struct nfit_test *t)
         }
  }
  
+static size_t sizeof_spa(struct acpi_nfit_system_address *spa)
+{
+       /* until spa location cookie support is added... */
+       return sizeof(*spa) - 8;
+}
+
  static int nfit_test0_alloc(struct nfit_test *t)
  {
-       size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+       struct acpi_nfit_system_address *spa = NULL;
+       size_t nfit_size = sizeof_spa(spa) * NUM_SPA
                         + sizeof(struct acpi_nfit_memory_map) * NUM_MEM
                         + sizeof(struct acpi_nfit_control_region) * NUM_DCR
                         + offsetof(struct acpi_nfit_control_region,
@@ -1937,7 +1944,8 @@ static int nfit_test0_alloc(struct nfit_test *t)
  
  static int nfit_test1_alloc(struct nfit_test *t)
  {
-       size_t nfit_size = sizeof(struct acpi_nfit_system_address) * 2
+       struct acpi_nfit_system_address *spa = NULL;
+       size_t nfit_size = sizeof_spa(spa) * 2
                 + sizeof(struct acpi_nfit_memory_map) * 2
                 + offsetof(struct acpi_nfit_control_region, window_size) * 2;
         int i;
@@ -2000,7 +2008,7 @@ static void nfit_test0_setup(struct nfit_test *t)
          */
         spa = nfit_buf;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
         spa->range_index = 0+1;
         spa->address = t->spa_set_dma[0];
@@ -2014,7 +2022,7 @@ static void nfit_test0_setup(struct nfit_test *t)
          */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
         spa->range_index = 1+1;
         spa->address = t->spa_set_dma[1];
@@ -2024,7 +2032,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* spa2 (dcr0) dimm0 */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
         spa->range_index = 2+1;
         spa->address = t->dcr_dma[0];
@@ -2034,7 +2042,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* spa3 (dcr1) dimm1 */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
         spa->range_index = 3+1;
         spa->address = t->dcr_dma[1];
@@ -2044,7 +2052,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* spa4 (dcr2) dimm2 */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
         spa->range_index = 4+1;
         spa->address = t->dcr_dma[2];
@@ -2054,7 +2062,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* spa5 (dcr3) dimm3 */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
         spa->range_index = 5+1;
         spa->address = t->dcr_dma[3];
@@ -2064,7 +2072,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* spa6 (bdw for dcr0) dimm0 */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
         spa->range_index = 6+1;
         spa->address = t->dimm_dma[0];
@@ -2074,7 +2082,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* spa7 (bdw for dcr1) dimm1 */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
         spa->range_index = 7+1;
         spa->address = t->dimm_dma[1];
@@ -2084,7 +2092,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* spa8 (bdw for dcr2) dimm2 */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
         spa->range_index = 8+1;
         spa->address = t->dimm_dma[2];
@@ -2094,7 +2102,7 @@ static void nfit_test0_setup(struct nfit_test *t)
         /* spa9 (bdw for dcr3) dimm3 */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
         spa->range_index = 9+1;
         spa->address = t->dimm_dma[3];
@@ -2581,7 +2589,7 @@ static void nfit_test0_setup(struct nfit_test *t)
                 /* spa10 (dcr4) dimm4 */
                 spa = nfit_buf + offset;
                 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-               spa->header.length = sizeof(*spa);
+               spa->header.length = sizeof_spa(spa);
                 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
                 spa->range_index = 10+1;
                 spa->address = t->dcr_dma[4];
@@ -2595,7 +2603,7 @@ static void nfit_test0_setup(struct nfit_test *t)
                  */
                 spa = nfit_buf + offset;
                 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-               spa->header.length = sizeof(*spa);
+               spa->header.length = sizeof_spa(spa);
                 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
                 spa->range_index = 11+1;
                 spa->address = t->spa_set_dma[2];
@@ -2605,7 +2613,7 @@ static void nfit_test0_setup(struct nfit_test *t)
                 /* spa12 (bdw for dcr4) dimm4 */
                 spa = nfit_buf + offset;
                 spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-               spa->header.length = sizeof(*spa);
+               spa->header.length = sizeof_spa(spa);
                 memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
                 spa->range_index = 12+1;
                 spa->address = t->dimm_dma[4];
@@ -2739,7 +2747,7 @@ static void nfit_test1_setup(struct nfit_test *t)
         /* spa0 (flat range with no bdw aliasing) */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
         spa->range_index = 0+1;
         spa->address = t->spa_set_dma[0];
@@ -2749,7 +2757,7 @@ static void nfit_test1_setup(struct nfit_test *t)
         /* virtual cd region */
         spa = nfit_buf + offset;
         spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
-       spa->header.length = sizeof(*spa);
+       spa->header.length = sizeof_spa(spa);
         memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
         spa->range_index = 0;
         spa->address = t->spa_set_dma[1];
diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c

index 656b049..67b77ab 100644 (file)
--- a/tools/testing/selftests/arm64/bti/test.c
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -6,6 +6,7 @@
  
  #include "system.h"
  
+#include <stddef.h>
  #include <linux/errno.h>
  #include <linux/auxvec.h>
  #include <linux/signal.h>
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S

index aaf7bc7..7629819 100644 (file)
--- a/tools/testing/selftests/kvm/lib/x86_64/handlers.S
+++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
@@ -54,9 +54,9 @@ idt_handlers:
         .align 8
  
         /* Fetch current address and append it to idt_handlers. */
-       current_handler = .
+666 :
  .pushsection .rodata
-.quad current_handler
+       .quad 666b
  .popsection
  
         .if ! \has_error
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c

index ca22ee6..63096ce 100644 (file)
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
@@ -18,6 +18,28 @@
  #include "vmx.h"
  
  #define VCPU_ID                5
+#define NMI_VECTOR     2
+
+static int ud_count;
+
+void enable_x2apic(void)
+{
+       uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
+
+       wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+             MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+       wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
+}
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       ud_count++;
+       regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
  
  void l2_guest_code(void)
  {
@@ -25,15 +47,23 @@ void l2_guest_code(void)
  
         GUEST_SYNC(8);
  
+       /* Forced exit to L1 upon restore */
+       GUEST_SYNC(9);
+
         /* Done, exit to L1 and never come back.  */
         vmcall();
  }
  
-void l1_guest_code(struct vmx_pages *vmx_pages)
+void guest_code(struct vmx_pages *vmx_pages)
  {
  #define L2_GUEST_STACK_SIZE 64
         unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
  
+       enable_x2apic();
+
+       GUEST_SYNC(1);
+       GUEST_SYNC(2);
+
         enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
  
         GUEST_ASSERT(vmx_pages->vmcs_gpa);
@@ -55,27 +85,40 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
         current_evmcs->revision_id = EVMCS_VERSION;
         GUEST_SYNC(6);
  
+       current_evmcs->pin_based_vm_exec_control |=
+               PIN_BASED_NMI_EXITING;
         GUEST_ASSERT(!vmlaunch());
         GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-       GUEST_SYNC(9);
+
+       /*
+        * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+        * up-to-date (RIP points where it should and not at the beginning
+        * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+        */
         GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
         GUEST_SYNC(10);
+
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_SYNC(11);
+
+       /* Try enlightened vmptrld with an incorrect GPA */
+       evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(ud_count == 1);
+       GUEST_DONE();
  }
  
-void guest_code(struct vmx_pages *vmx_pages)
+void inject_nmi(struct kvm_vm *vm)
  {
-       GUEST_SYNC(1);
-       GUEST_SYNC(2);
+       struct kvm_vcpu_events events;
  
-       if (vmx_pages)
-               l1_guest_code(vmx_pages);
+       vcpu_events_get(vm, VCPU_ID, &events);
  
-       GUEST_DONE();
+       events.nmi.pending = 1;
+       events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
  
-       /* Try enlightened vmptrld with an incorrect GPA */
-       evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
-       GUEST_ASSERT(vmlaunch());
+       vcpu_events_set(vm, VCPU_ID, &events);
  }
  
  int main(int argc, char *argv[])
@@ -109,6 +152,13 @@ int main(int argc, char *argv[])
         vcpu_alloc_vmx(vm, &vmx_pages_gva);
         vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
  
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+       vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+       vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+
+       pr_info("Running L1 which uses EVMCS to run L2\n");
+
         for (stage = 1;; stage++) {
                 _vcpu_run(vm, VCPU_ID);
                 TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -124,7 +174,7 @@ int main(int argc, char *argv[])
                 case UCALL_SYNC:
                         break;
                 case UCALL_DONE:
-                       goto part1_done;
+                       goto done;
                 default:
                         TEST_FAIL("Unknown ucall %lu", uc.cmd);
                 }
@@ -154,12 +204,14 @@ int main(int argc, char *argv[])
                 TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
                             "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
                             (ulong) regs2.rdi, (ulong) regs2.rsi);
-       }
  
-part1_done:
-       _vcpu_run(vm, VCPU_ID);
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
-                   "Unexpected successful VMEnter with invalid eVMCS pointer!");
+               /* Force immediate L2->L1 exit before resuming */
+               if (stage == 8) {
+                       pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+                       inject_nmi(vm);
+               }
+       }
  
+done:
         kvm_vm_free(vm);
  }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c

index 2799c66..6b4feb9 100644 (file)
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2893,8 +2893,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
         if (val < grow_start)
                 val = grow_start;
  
-       if (val > halt_poll_ns)
-               val = halt_poll_ns;
+       if (val > vcpu->kvm->max_halt_poll_ns)
+               val = vcpu->kvm->max_halt_poll_ns;
  
         vcpu->halt_poll_ns = val;
  out:
@@ -2973,7 +2973,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
                                 goto out;
                         }
                         poll_end = cur = ktime_get();
-               } while (single_task_running() && ktime_before(cur, stop));
+               } while (single_task_running() && !need_resched() &&
+                        ktime_before(cur, stop));
         }
  
         prepare_to_rcuwait(&vcpu->wait);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 16 May 2021 16:55:05 +0000 (09:55 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 16 May 2021 16:55:05 +0000 (09:55 -0700)
.mailmap		patch \| blob \| history
Documentation/ABI/obsolete/sysfs-class-dax		patch \| blob \| history
Documentation/ABI/obsolete/sysfs-kernel-fadump_registered		patch \| blob \| history
Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem		patch \| blob \| history
Documentation/ABI/removed/sysfs-bus-nfit		patch \| blob \| history
Documentation/ABI/testing/sysfs-bus-nfit		patch \| blob \| history
Documentation/ABI/testing/sysfs-bus-papr-pmem		patch \| blob \| history
Documentation/ABI/testing/sysfs-module		patch \| blob \| history
Documentation/admin-guide/sysctl/kernel.rst		patch \| blob \| history
Documentation/block/data-integrity.rst		patch \| blob \| history
Documentation/cdrom/cdrom-standard.rst		patch \| blob \| history
Documentation/driver-api/nvdimm/nvdimm.rst		patch \| blob \| history
Documentation/driver-api/serial/index.rst		patch \| blob \| history
Documentation/filesystems/erofs.rst		patch \| blob \| history
Documentation/hwmon/tmp103.rst		patch \| blob \| history
Documentation/networking/device_drivers/ethernet/intel/i40e.rst		patch \| blob \| history
Documentation/networking/device_drivers/ethernet/intel/iavf.rst		patch \| blob \| history
Documentation/process/kernel-enforcement-statement.rst		patch \| blob \| history
Documentation/security/tpm/xen-tpmfront.rst		patch \| blob \| history
Documentation/timers/no_hz.rst		patch \| blob \| history
Documentation/translations/zh_CN/SecurityBugs	[deleted file]	patch \| blob \| history
Documentation/usb/mtouchusb.rst		patch \| blob \| history
Documentation/usb/usb-serial.rst		patch \| blob \| history
Documentation/virt/kvm/amd-memory-encryption.rst		patch \| blob \| history
Documentation/virt/kvm/api.rst		patch \| blob \| history
Documentation/x86/amd-memory-encryption.rst		patch \| blob \| history
MAINTAINERS		patch \| blob \| history
arch/arc/Makefile		patch \| blob \| history
arch/arc/include/asm/cmpxchg.h		patch \| blob \| history
arch/arc/include/asm/page.h		patch \| blob \| history
arch/arc/include/asm/pgtable.h		patch \| blob \| history
arch/arc/include/uapi/asm/page.h		patch \| blob \| history
arch/arc/kernel/entry.S		patch \| blob \| history
arch/arc/kernel/kgdb.c		patch \| blob \| history
arch/arc/kernel/process.c		patch \| blob \| history
arch/arc/kernel/signal.c		patch \| blob \| history
arch/arc/mm/init.c		patch \| blob \| history
arch/arc/mm/ioremap.c		patch \| blob \| history
arch/arc/mm/tlb.c		patch \| blob \| history
arch/arm/xen/mm.c		patch \| blob \| history
arch/arm64/Makefile		patch \| blob \| history
arch/arm64/include/asm/Kbuild		patch \| blob \| history
arch/arm64/include/asm/cpucaps.h	[deleted file]	patch \| blob \| history
arch/arm64/mm/flush.c		patch \| blob \| history
arch/arm64/mm/init.c		patch \| blob \| history
arch/arm64/mm/proc.S		patch \| blob \| history
arch/arm64/tools/Makefile	[new file with mode: 0644]	patch \| blob
arch/arm64/tools/cpucaps	[new file with mode: 0644]	patch \| blob
arch/arm64/tools/gen-cpucaps.awk	[new file with mode: 0755]	patch \| blob
arch/powerpc/include/asm/hvcall.h		patch \| blob \| history
arch/powerpc/include/asm/interrupt.h		patch \| blob \| history
arch/powerpc/include/asm/paravirt.h		patch \| blob \| history
arch/powerpc/include/asm/plpar_wrappers.h		patch \| blob \| history
arch/powerpc/include/asm/uaccess.h		patch \| blob \| history
arch/powerpc/kernel/exceptions-64e.S		patch \| blob \| history
arch/powerpc/kernel/interrupt.c		patch \| blob \| history
arch/powerpc/kernel/legacy_serial.c		patch \| blob \| history
arch/powerpc/kernel/signal.h		patch \| blob \| history
arch/powerpc/kvm/book3s_64_mmu_hv.c		patch \| blob \| history
arch/powerpc/lib/feature-fixups.c		patch \| blob \| history
arch/powerpc/platforms/pseries/hvCall.S		patch \| blob \| history
arch/powerpc/platforms/pseries/lpar.c		patch \| blob \| history
arch/sh/kernel/traps.c		patch \| blob \| history
arch/x86/boot/compressed/Makefile		patch \| blob \| history
arch/x86/boot/compressed/misc.c		patch \| blob \| history
arch/x86/boot/compressed/misc.h		patch \| blob \| history
arch/x86/boot/compressed/sev-es.c	[deleted file]	patch \| blob \| history
arch/x86/boot/compressed/sev.c	[new file with mode: 0644]	patch \| blob
arch/x86/include/asm/kvm_host.h		patch \| blob \| history
arch/x86/include/asm/kvm_para.h		patch \| blob \| history
arch/x86/include/asm/msr-index.h		patch \| blob \| history
arch/x86/include/asm/processor.h		patch \| blob \| history
arch/x86/include/asm/sev-common.h	[new file with mode: 0644]	patch \| blob
arch/x86/include/asm/sev-es.h	[deleted file]	patch \| blob \| history
arch/x86/include/asm/sev.h	[new file with mode: 0644]	patch \| blob
arch/x86/include/asm/vdso/clocksource.h		patch \| blob \| history
arch/x86/include/uapi/asm/kvm.h		patch \| blob \| history
arch/x86/kernel/Makefile		patch \| blob \| history
arch/x86/kernel/cpu/amd.c		patch \| blob \| history
arch/x86/kernel/cpu/mtrr/cleanup.c		patch \| blob \| history
arch/x86/kernel/cpu/mtrr/generic.c		patch \| blob \| history
arch/x86/kernel/head64.c		patch \| blob \| history
arch/x86/kernel/kvm.c		patch \| blob \| history
arch/x86/kernel/kvmclock.c		patch \| blob \| history
arch/x86/kernel/mmconf-fam10h_64.c		patch \| blob \| history
arch/x86/kernel/nmi.c		patch \| blob \| history
arch/x86/kernel/sev-es-shared.c	[deleted file]	patch \| blob \| history
arch/x86/kernel/sev-es.c	[deleted file]	patch \| blob \| history
arch/x86/kernel/sev-shared.c	[new file with mode: 0644]	patch \| blob
arch/x86/kernel/sev.c	[new file with mode: 0644]	patch \| blob
arch/x86/kernel/smpboot.c		patch \| blob \| history
arch/x86/kvm/cpuid.c		patch \| blob \| history
arch/x86/kvm/emulate.c		patch \| blob \| history
arch/x86/kvm/kvm_emulate.h		patch \| blob \| history
arch/x86/kvm/lapic.c		patch \| blob \| history
arch/x86/kvm/mmu/mmu.c		patch \| blob \| history
arch/x86/kvm/mmu/tdp_mmu.c		patch \| blob \| history
arch/x86/kvm/svm/nested.c		patch \| blob \| history
arch/x86/kvm/svm/sev.c		patch \| blob \| history
arch/x86/kvm/svm/svm.c		patch \| blob \| history
arch/x86/kvm/svm/svm.h		patch \| blob \| history
arch/x86/kvm/vmx/capabilities.h		patch \| blob \| history
arch/x86/kvm/vmx/nested.c		patch \| blob \| history
arch/x86/kvm/vmx/vmx.c		patch \| blob \| history
arch/x86/kvm/vmx/vmx.h		patch \| blob \| history
arch/x86/kvm/x86.c		patch \| blob \| history
arch/x86/mm/extable.c		patch \| blob \| history
arch/x86/mm/mem_encrypt_identity.c		patch \| blob \| history
arch/x86/pci/amd_bus.c		patch \| blob \| history
arch/x86/platform/efi/efi_64.c		patch \| blob \| history
arch/x86/realmode/init.c		patch \| blob \| history
arch/x86/realmode/rm/trampoline_64.S		patch \| blob \| history
block/bfq-iosched.c		patch \| blob \| history
block/blk-iocost.c		patch \| blob \| history
block/blk-mq-sched.c		patch \| blob \| history
block/blk-mq.c		patch \| blob \| history
block/kyber-iosched.c		patch \| blob \| history
block/mq-deadline.c		patch \| blob \| history
block/partitions/efi.c		patch \| blob \| history
drivers/acpi/device_pm.c		patch \| blob \| history
drivers/acpi/internal.h		patch \| blob \| history
drivers/acpi/nfit/core.c		patch \| blob \| history
drivers/acpi/power.c		patch \| blob \| history
drivers/acpi/scan.c		patch \| blob \| history
drivers/acpi/sleep.h		patch \| blob \| history
drivers/base/power/runtime.c		patch \| blob \| history
drivers/block/nbd.c		patch \| blob \| history
drivers/char/tpm/tpm2-cmd.c		patch \| blob \| history
drivers/char/tpm/tpm_tis_core.c		patch \| blob \| history
drivers/clocksource/hyperv_timer.c		patch \| blob \| history
drivers/cpufreq/acpi-cpufreq.c		patch \| blob \| history
drivers/cpufreq/intel_pstate.c		patch \| blob \| history
drivers/edac/amd64_edac.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/nv.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/soc15.c		patch \| blob \| history
drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c		patch \| blob \| history
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c		patch \| blob \| history
drivers/gpu/drm/amd/include/amd_shared.h		patch \| blob \| history
drivers/gpu/drm/amd/pm/powerplay/si_dpm.c		patch \| blob \| history
drivers/gpu/drm/amd/pm/powerplay/sislands_smc.h		patch \| blob \| history
drivers/gpu/drm/i915/display/intel_dp.c		patch \| blob \| history
drivers/gpu/drm/i915/display/intel_overlay.c		patch \| blob \| history
drivers/gpu/drm/i915/gem/i915_gem_mman.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/gen8_ppgtt.c		patch \| blob \| history
drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c		patch \| blob \| history
drivers/gpu/drm/i915/i915_active.c		patch \| blob \| history
drivers/gpu/drm/msm/adreno/a6xx_gpu.c		patch \| blob \| history
drivers/gpu/drm/msm/dp/dp_audio.c		patch \| blob \| history
drivers/gpu/drm/msm/dp/dp_display.c		patch \| blob \| history
drivers/gpu/drm/msm/dp/dp_display.h		patch \| blob \| history
drivers/gpu/drm/msm/dsi/phy/dsi_phy.c		patch \| blob \| history
drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c		patch \| blob \| history
drivers/gpu/drm/msm/msm_drv.c		patch \| blob \| history
drivers/gpu/drm/msm/msm_gem.c		patch \| blob \| history
drivers/gpu/drm/msm/msm_gem.h		patch \| blob \| history
drivers/gpu/drm/radeon/ni_dpm.c		patch \| blob \| history
drivers/gpu/drm/radeon/nislands_smc.h		patch \| blob \| history
drivers/gpu/drm/radeon/radeon.h		patch \| blob \| history
drivers/gpu/drm/radeon/radeon_pm.c		patch \| blob \| history
drivers/gpu/drm/radeon/si_dpm.c		patch \| blob \| history
drivers/gpu/drm/radeon/sislands_smc.h		patch \| blob \| history
drivers/gpu/drm/vc4/vc4_vec.c		patch \| blob \| history
drivers/hwmon/adm9240.c		patch \| blob \| history
drivers/hwmon/corsair-psu.c		patch \| blob \| history
drivers/hwmon/ltc2992.c		patch \| blob \| history
drivers/hwmon/occ/common.c		patch \| blob \| history
drivers/hwmon/occ/common.h		patch \| blob \| history
drivers/hwmon/pmbus/fsp-3y.c		patch \| blob \| history
drivers/nvme/host/core.c		patch \| blob \| history
drivers/nvme/host/multipath.c		patch \| blob \| history
drivers/nvme/host/nvme.h		patch \| blob \| history
drivers/nvme/target/admin-cmd.c		patch \| blob \| history
drivers/nvme/target/discovery.c		patch \| blob \| history
drivers/nvme/target/fabrics-cmd.c		patch \| blob \| history
drivers/nvme/target/io-cmd-bdev.c		patch \| blob \| history
drivers/nvme/target/io-cmd-file.c		patch \| blob \| history
drivers/nvme/target/nvmet.h		patch \| blob \| history
drivers/nvme/target/passthru.c		patch \| blob \| history
drivers/nvme/target/rdma.c		patch \| blob \| history
drivers/tty/vt/vt.c		patch \| blob \| history
drivers/tty/vt/vt_ioctl.c		patch \| blob \| history
drivers/video/console/vgacon.c		patch \| blob \| history
drivers/video/fbdev/core/fbcon.c		patch \| blob \| history
drivers/xen/gntdev.c		patch \| blob \| history
drivers/xen/swiotlb-xen.c		patch \| blob \| history
drivers/xen/unpopulated-alloc.c		patch \| blob \| history
fs/btrfs/ctree.h		patch \| blob \| history
fs/btrfs/extent-tree.c		patch \| blob \| history
fs/btrfs/file.c		patch \| blob \| history
fs/btrfs/free-space-cache.c		patch \| blob \| history
fs/btrfs/inode.c		patch \| blob \| history
fs/btrfs/ioctl.c		patch \| blob \| history
fs/btrfs/ordered-data.c		patch \| blob \| history
fs/btrfs/qgroup.c		patch \| blob \| history
fs/btrfs/send.c		patch \| blob \| history
fs/btrfs/tree-log.c		patch \| blob \| history
fs/btrfs/zoned.c		patch \| blob \| history
fs/dax.c		patch \| blob \| history
fs/erofs/zmap.c		patch \| blob \| history
fs/f2fs/compress.c		patch \| blob \| history
fs/f2fs/data.c		patch \| blob \| history
fs/f2fs/f2fs.h		patch \| blob \| history
fs/f2fs/file.c		patch \| blob \| history
fs/f2fs/segment.c		patch \| blob \| history
fs/hfsplus/extents.c		patch \| blob \| history
fs/hugetlbfs/inode.c		patch \| blob \| history
fs/io_uring.c		patch \| blob \| history
fs/iomap/buffered-io.c		patch \| blob \| history
fs/squashfs/file.c		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history
include/linux/console_struct.h		patch \| blob \| history
include/linux/elevator.h		patch \| blob \| history
include/linux/libnvdimm.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
include/linux/mm_types.h		patch \| blob \| history
include/linux/pagemap.h		patch \| blob \| history
include/linux/pm.h		patch \| blob \| history
include/linux/randomize_kstack.h		patch \| blob \| history
include/net/page_pool.h		patch \| blob \| history
include/uapi/linux/fs.h		patch \| blob \| history
include/xen/arm/swiotlb-xen.h		patch \| blob \| history
kernel/ptrace.c		patch \| blob \| history
kernel/resource.c		patch \| blob \| history
kernel/sched/fair.c		patch \| blob \| history
kernel/time/alarmtimer.c		patch \| blob \| history
kernel/trace/trace.c		patch \| blob \| history
lib/test_kasan.c		patch \| blob \| history
mm/hugetlb.c		patch \| blob \| history
mm/ioremap.c		patch \| blob \| history
mm/ksm.c		patch \| blob \| history
mm/shmem.c		patch \| blob \| history
mm/slab_common.c		patch \| blob \| history
mm/slub.c		patch \| blob \| history
net/core/page_pool.c		patch \| blob \| history
security/keys/trusted-keys/trusted_tpm1.c		patch \| blob \| history
security/keys/trusted-keys/trusted_tpm2.c		patch \| blob \| history
tools/arch/powerpc/include/uapi/asm/errno.h		patch \| blob \| history
tools/arch/x86/include/asm/cpufeatures.h		patch \| blob \| history
tools/arch/x86/include/asm/msr-index.h		patch \| blob \| history
tools/arch/x86/include/uapi/asm/vmx.h		patch \| blob \| history
tools/arch/x86/lib/memcpy_64.S		patch \| blob \| history
tools/arch/x86/lib/memset_64.S		patch \| blob \| history
tools/include/asm/alternative-asm.h	[deleted file]	patch \| blob \| history
tools/include/asm/alternative.h	[new file with mode: 0644]	patch \| blob
tools/include/uapi/asm-generic/unistd.h		patch \| blob \| history
tools/include/uapi/drm/drm.h		patch \| blob \| history
tools/include/uapi/drm/i915_drm.h		patch \| blob \| history
tools/include/uapi/linux/kvm.h		patch \| blob \| history
tools/include/uapi/linux/perf_event.h		patch \| blob \| history
tools/include/uapi/linux/prctl.h		patch \| blob \| history
tools/kvm/kvm_stat/kvm_stat.txt		patch \| blob \| history
tools/objtool/arch/x86/decode.c		patch \| blob \| history
tools/objtool/elf.c		patch \| blob \| history
tools/perf/Makefile.config		patch \| blob \| history
tools/perf/arch/arm64/util/kvm-stat.c		patch \| blob \| history
tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl		patch \| blob \| history
tools/perf/arch/powerpc/entry/syscalls/syscall.tbl		patch \| blob \| history
tools/perf/arch/s390/entry/syscalls/syscall.tbl		patch \| blob \| history
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl		patch \| blob \| history
tools/perf/pmu-events/jevents.c		patch \| blob \| history
tools/perf/tests/attr/base-record		patch \| blob \| history
tools/perf/tests/attr/base-stat		patch \| blob \| history
tools/perf/tests/attr/system-wide-dummy		patch \| blob \| history
tools/perf/util/Build		patch \| blob \| history
tools/perf/util/record.c		patch \| blob \| history
tools/perf/util/session.c		patch \| blob \| history
tools/testing/nvdimm/test/iomap.c		patch \| blob \| history
tools/testing/nvdimm/test/nfit.c		patch \| blob \| history
tools/testing/selftests/arm64/bti/test.c		patch \| blob \| history
tools/testing/selftests/kvm/lib/x86_64/handlers.S		patch \| blob \| history
tools/testing/selftests/kvm/x86_64/evmcs_test.c		patch \| blob \| history
virt/kvm/kvm_main.c		patch \| blob \| history