Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Mar 2021 01:27:59 +0000 (17:27 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 6 Mar 2021 01:27:59 +0000 (17:27 -0800)
Pull rdma fixes from Jason Gunthorpe:
 "Nothing special here, though Bob's regression fixes for rxe would have
  made it before the rc cycle had there not been such strong winter
  weather!

   - Fix corner cases in the rxe reference counting cleanup that are
     causing regressions in blktests for SRP

   - Two kdoc fixes so W=1 is clean

   - Missing error return in error unwind for mlx5

   - Wrong lock type nesting in IB CM"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma:
  RDMA/rxe: Fix errant WARN_ONCE in rxe_completer()
  RDMA/rxe: Fix extra deref in rxe_rcv_mcast_pkt()
  RDMA/rxe: Fix missed IB reference counting in loopback
  RDMA/uverbs: Fix kernel-doc warning of _uverbs_alloc
  RDMA/mlx5: Set correct kernel-doc identifier
  IB/mlx5: Add missing error code
  RDMA/rxe: Fix missing kconfig dependency on CRYPTO
  RDMA/cm: Fix IRQ restore in ib_send_cm_sidr_rep

157 files changed:
Documentation/block/bfq-iosched.rst
Documentation/devicetree/bindings/display/brcm,bcm2711-hdmi.yaml
Documentation/devicetree/bindings/media/i2c/imx258.yaml
Documentation/devicetree/bindings/media/i2c/ovti,ov5647.yaml
Documentation/devicetree/bindings/media/i2c/ovti,ov5648.yaml
Documentation/devicetree/bindings/media/i2c/ovti,ov8865.yaml
Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml
Documentation/virt/kvm/api.rst
arch/arm/xen/p2m.c
arch/ia64/kernel/signal.c
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/xen/page.h
arch/x86/kvm/Kconfig
arch/x86/kvm/Makefile
arch/x86/kvm/hyperv.c
arch/x86/kvm/mmu/mmu_internal.h
arch/x86/kvm/svm/svm.c
arch/x86/kvm/x86.c
arch/x86/kvm/xen.c
arch/x86/kvm/xen.h
arch/x86/xen/p2m.c
arch/x86/xen/setup.c
block/bfq-iosched.c
block/blk-mq-debugfs.c
block/blk-mq-sched.c
block/genhd.c
block/partitions/core.c
drivers/base/power/runtime.c
drivers/block/rsxx/core.c
drivers/block/rsxx/rsxx_priv.h
drivers/char/tpm/tpm-chip.c
drivers/char/tpm/tpm_tis_core.c
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/nv.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
drivers/iommu/amd/io_pgtable.c
drivers/iommu/dma-iommu.c
drivers/iommu/intel/pasid.h
drivers/iommu/tegra-smmu.c
drivers/md/dm-bufio.c
drivers/md/dm-verity-fec.c
drivers/net/xen-netback/netback.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/hwmon.c
drivers/nvme/host/pci.c
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/configfs.c
drivers/nvme/target/core.c
drivers/nvme/target/nvmet.h
drivers/powercap/Kconfig
drivers/powercap/dtpm.c
drivers/scsi/libiscsi.c
drivers/scsi/scsi_transport_iscsi.c
drivers/soundwire/intel.h
drivers/soundwire/intel_init.c
fs/btrfs/block-group.c
fs/btrfs/block-group.h
fs/btrfs/compression.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/lzo.c
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/raid56.c
fs/btrfs/ref-verify.c
fs/btrfs/reflink.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/tree-checker.c
fs/btrfs/tree-log.c
fs/btrfs/xattr.c
fs/btrfs/zlib.c
fs/btrfs/zoned.c
fs/btrfs/zstd.c
fs/io-wq.c
fs/io-wq.h
fs/io_uring.c
fs/pstore/inode.c
fs/pstore/ram_core.c
include/linux/acpi.h
include/linux/blkdev.h
include/linux/highmem.h
include/linux/io_uring.h
include/linux/sched/task.h
include/linux/soundwire/sdw_intel.h
include/linux/swap.h
include/linux/trace_events.h
include/sound/intel-nhlt.h
include/sound/soc-acpi.h
include/uapi/linux/kvm.h
kernel/fork.c
kernel/trace/Kconfig
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_events_synth.c
lib/iov_iter.c
mm/page_io.c
mm/swapfile.c
scripts/dtc/include-prefixes/c6x [deleted symlink]
scripts/gcc-plugins/latent_entropy_plugin.c
scripts/gcc-plugins/structleak_plugin.c
scripts/recordmcount.c
sound/hda/Kconfig
sound/hda/Makefile
sound/hda/ext/hdac_ext_controller.c
sound/hda/ext/hdac_ext_stream.c
sound/hda/hdac_regmap.c
sound/hda/intel-dsp-config.c
sound/hda/intel-nhlt.c
sound/hda/intel-sdw-acpi.c [new file with mode: 0644]
sound/mips/snd-n64.c
sound/pci/ctxfi/cthw20k2.c
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_jack.c
sound/pci/hda/patch_ca0132.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/sof/Kconfig
sound/soc/sof/Makefile
sound/soc/sof/intel/Kconfig
sound/soc/sof/intel/Makefile
sound/soc/sof/intel/bdw.c
sound/soc/sof/intel/byt.c
sound/soc/sof/intel/hda.c
sound/soc/sof/intel/hda.h
sound/soc/sof/intel/pci-apl.c [new file with mode: 0644]
sound/soc/sof/intel/pci-cnl.c [new file with mode: 0644]
sound/soc/sof/intel/pci-icl.c [new file with mode: 0644]
sound/soc/sof/intel/pci-tgl.c [new file with mode: 0644]
sound/soc/sof/intel/pci-tng.c [new file with mode: 0644]
sound/soc/sof/intel/shim.h
sound/soc/sof/sof-acpi-dev.c
sound/soc/sof/sof-acpi-dev.h [new file with mode: 0644]
sound/soc/sof/sof-pci-dev.c
sound/soc/sof/sof-pci-dev.h [new file with mode: 0644]
sound/usb/clock.c
sound/usb/mixer.c
sound/usb/mixer_maps.c
sound/usb/pcm.c
sound/usb/quirks.c
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c

index 19d4d15..66c5a4e 100644 (file)
@@ -430,13 +430,13 @@ fifo_expire_async
 -----------------
 
 This parameter is used to set the timeout of asynchronous requests. Default
-value of this is 248ms.
+value of this is 250ms.
 
 fifo_expire_sync
 ----------------
 
 This parameter is used to set the timeout of synchronous requests. Default
-value of this is 124ms. In case to favor synchronous requests over asynchronous
+value of this is 125ms. In case to favor synchronous requests over asynchronous
 one, this value should be decreased relative to fifo_expire_async.
 
 low_latency
index eaf7786..515317e 100644 (file)
@@ -49,10 +49,14 @@ properties:
 
   # See ../video-interfaces.txt for more details
   port:
-    type: object
+    $ref: /schemas/graph.yaml#/properties/port
+    additionalProperties: false
+
     properties:
       endpoint:
-        type: object
+        $ref: /schemas/media/video-interfaces.yaml#
+        unevaluatedProperties: false
+
         properties:
           data-lanes:
             oneOf:
@@ -65,11 +69,7 @@ properties:
                   - const: 1
                   - const: 2
 
-          link-frequencies:
-            allOf:
-              - $ref: /schemas/types.yaml#/definitions/uint64-array
-            description:
-              Allowed data bus frequencies.
+          link-frequencies: true
 
         required:
           - data-lanes
index 1ab22e7..3e5d82d 100644 (file)
@@ -31,7 +31,8 @@ properties:
     maxItems: 1
 
   port:
-    $ref: /schemas/graph.yaml#/$defs/port-base
+    $ref: /schemas/graph.yaml#/properties/port
+    additionalProperties: false
 
     properties:
       endpoint:
@@ -41,8 +42,6 @@ properties:
         properties:
           clock-noncontinuous: true
 
-    additionalProperties: false
-
 required:
   - compatible
   - reg
index f8783f7..9149f56 100644 (file)
@@ -44,19 +44,17 @@ properties:
     description: Reset Pin GPIO Control (active low)
 
   port:
-    type: object
     description: MIPI CSI-2 transmitter port
+    $ref: /schemas/graph.yaml#/properties/port
+    additionalProperties: false
 
     properties:
       endpoint:
-        type: object
+        $ref: /schemas/media/video-interfaces.yaml#
+        unevaluatedProperties: false
 
         properties:
-          remote-endpoint: true
-
-          link-frequencies:
-            $ref: /schemas/types.yaml#/definitions/uint64-array
-            description: Allowed MIPI CSI-2 link frequencies
+          link-frequencies: true
 
           data-lanes:
             minItems: 1
@@ -65,10 +63,6 @@ properties:
         required:
           - data-lanes
           - link-frequencies
-          - remote-endpoint
-
-    required:
-      - endpoint
 
 required:
   - compatible
index c0ba28a..0699c7e 100644 (file)
@@ -44,19 +44,17 @@ properties:
     description: Reset Pin GPIO Control (active low)
 
   port:
-    type: object
     description: MIPI CSI-2 transmitter port
+    $ref: /schemas/graph.yaml#/properties/port
+    additionalProperties: false
 
     properties:
       endpoint:
-        type: object
+        $ref: /schemas/media/video-interfaces.yaml#
+        unevaluatedProperties: false
 
         properties:
-          remote-endpoint: true
-
-          link-frequencies:
-            $ref: /schemas/types.yaml#/definitions/uint64-array
-            description: Allowed MIPI CSI-2 link frequencies
+          link-frequencies: true
 
           data-lanes:
             minItems: 1
@@ -65,10 +63,6 @@ properties:
         required:
           - data-lanes
           - link-frequencies
-          - remote-endpoint
-
-    required:
-      - endpoint
 
 required:
   - compatible
index 24e6893..27cc5b7 100644 (file)
@@ -36,18 +36,17 @@ properties:
     description: Reference to the GPIO connected to the XCLR pin, if any.
 
   port:
-    type: object
     additionalProperties: false
     $ref: /schemas/graph.yaml#/properties/port
 
     properties:
       endpoint:
-        type: object
+        $ref: /schemas/media/video-interfaces.yaml#
+        unevaluatedProperties: false
+
         properties:
-          data-lanes:
-            $ref: ../video-interfaces.yaml#/properties/data-lanes
-          link-frequencies:
-            $ref: ../video-interfaces.yaml#/properties/link-frequencies
+          data-lanes: true
+          link-frequencies: true
 
         required:
           - data-lanes
index aed52b0..1a2b521 100644 (file)
@@ -3856,49 +3856,20 @@ base 2 of the page size in the bottom 6 bits.
          -EFAULT if struct kvm_reinject_control cannot be read,
         -EINVAL if the supplied shift or flags are invalid,
         -ENOMEM if unable to allocate the new HPT,
-        -ENOSPC if there was a hash collision
-
-::
-
-  struct kvm_ppc_rmmu_info {
-       struct kvm_ppc_radix_geom {
-               __u8    page_shift;
-               __u8    level_bits[4];
-               __u8    pad[3];
-       }       geometries[8];
-       __u32   ap_encodings[8];
-  };
-
-The geometries[] field gives up to 8 supported geometries for the
-radix page table, in terms of the log base 2 of the smallest page
-size, and the number of bits indexed at each level of the tree, from
-the PTE level up to the PGD level in that order.  Any unused entries
-will have 0 in the page_shift field.
-
-The ap_encodings gives the supported page sizes and their AP field
-encodings, encoded with the AP value in the top 3 bits and the log
-base 2 of the page size in the bottom 6 bits.
-
-4.102 KVM_PPC_RESIZE_HPT_PREPARE
---------------------------------
-
-:Capability: KVM_CAP_SPAPR_RESIZE_HPT
-:Architectures: powerpc
-:Type: vm ioctl
-:Parameters: struct kvm_ppc_resize_hpt (in)
-:Returns: 0 on successful completion,
-        >0 if a new HPT is being prepared, the value is an estimated
-         number of milliseconds until preparation is complete,
-         -EFAULT if struct kvm_reinject_control cannot be read,
-        -EINVAL if the supplied shift or flags are invalid,when moving existing
-         HPT entries to the new HPT,
-        -EIO on other error conditions
 
 Used to implement the PAPR extension for runtime resizing of a guest's
 Hashed Page Table (HPT).  Specifically this starts, stops or monitors
 the preparation of a new potential HPT for the guest, essentially
 implementing the H_RESIZE_HPT_PREPARE hypercall.
 
+::
+
+  struct kvm_ppc_resize_hpt {
+       __u64 flags;
+       __u32 shift;
+       __u32 pad;
+  };
+
 If called with shift > 0 when there is no pending HPT for the guest,
 this begins preparation of a new pending HPT of size 2^(shift) bytes.
 It then returns a positive integer with the estimated number of
@@ -3926,14 +3897,6 @@ Normally this will be called repeatedly with the same parameters until
 it returns <= 0.  The first call will initiate preparation, subsequent
 ones will monitor preparation until it completes or fails.
 
-::
-
-  struct kvm_ppc_resize_hpt {
-       __u64 flags;
-       __u32 shift;
-       __u32 pad;
-  };
-
 4.103 KVM_PPC_RESIZE_HPT_COMMIT
 -------------------------------
 
@@ -3956,6 +3919,14 @@ Hashed Page Table (HPT).  Specifically this requests that the guest be
 transferred to working with the new HPT, essentially implementing the
 H_RESIZE_HPT_COMMIT hypercall.
 
+::
+
+  struct kvm_ppc_resize_hpt {
+       __u64 flags;
+       __u32 shift;
+       __u32 pad;
+  };
+
 This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
 returned 0 with the same parameters.  In other cases
 KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
@@ -3971,14 +3942,6 @@ HPT and the previous HPT will be discarded.
 
 On failure, the guest will still be operating on its previous HPT.
 
-::
-
-  struct kvm_ppc_resize_hpt {
-       __u64 flags;
-       __u32 shift;
-       __u32 pad;
-  };
-
 4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
 -----------------------------------
 
@@ -4915,6 +4878,14 @@ see KVM_XEN_HVM_SET_ATTR above.
        union {
                __u64 gpa;
                __u64 pad[4];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
        } u;
   };
 
@@ -4927,6 +4898,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
   Sets the guest physical address of an additional pvclock structure
   for a given vCPU. This is typically used for guest vsyscall support.
 
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
+  Sets the guest physical address of the vcpu_runstate_info for a given
+  vCPU. This is how a Xen guest tracks CPU state such as steal time.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
+  Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
+  the given vCPU from the .u.runstate.state member of the structure.
+  KVM automatically accounts running and runnable time but blocked
+  and offline states are only entered explicitly.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
+  Sets all fields of the vCPU runstate data from the .u.runstate member
+  of the structure, including the current runstate. The state_entry_time
+  must equal the sum of the other four times.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
+  This *adds* the contents of the .u.runstate members of the structure
+  to the corresponding members of the given vCPU's runstate data, thus
+  permitting atomic adjustments to the runstate times. The adjustment
+  to the state_entry_time must equal the sum of the adjustments to the
+  other four times. The state field must be set to -1, or to a valid
+  runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked
+  or RUNSTATE_offline) to set the current accounted state as of the
+  adjusted state_entry_time.
+
 4.130 KVM_XEN_VCPU_GET_ATTR
 ---------------------------
 
@@ -4939,6 +4935,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
 Allows Xen vCPU attributes to be read. For the structure and types,
 see KVM_XEN_VCPU_SET_ATTR above.
 
+The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
+with the KVM_XEN_VCPU_GET_ATTR ioctl.
+
 5. The kvm_run structure
 ========================
 
@@ -5000,7 +4999,8 @@ local APIC is not used.
        __u16 flags;
 
 More architecture-specific flags detailing state of the VCPU that may
-affect the device's behavior. Current defined flags:
+affect the device's behavior. Current defined flags::
+
   /* x86, set if the VCPU is in system management mode */
   #define KVM_RUN_X86_SMM     (1 << 0)
   /* x86, set if bus lock detected in VM */
@@ -6217,7 +6217,7 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit
 notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
 KVM_RUN_BUS_LOCK flag is used to distinguish between them.
 
-7.22 KVM_CAP_PPC_DAWR1
+7.23 KVM_CAP_PPC_DAWR1
 ----------------------
 
 :Architectures: ppc
@@ -6702,6 +6702,7 @@ PVHVM guests. Valid flags are::
   #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR     (1 << 0)
   #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL   (1 << 1)
   #define KVM_XEN_HVM_CONFIG_SHARED_INFO       (1 << 2)
+  #define KVM_XEN_HVM_CONFIG_RUNSTATE          (1 << 2)
 
 The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
 ioctl is available, for the guest to set its hypercall page.
@@ -6716,3 +6717,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
 KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
 for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
 vcpu_info is set.
+
+The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
+features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
+supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
index fd6e3aa..acb4645 100644 (file)
@@ -93,12 +93,39 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
        int i;
 
        for (i = 0; i < count; i++) {
+               struct gnttab_unmap_grant_ref unmap;
+               int rc;
+
                if (map_ops[i].status)
                        continue;
-               if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
-                                   map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) {
-                       return -ENOMEM;
-               }
+               if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
+                                   map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT)))
+                       continue;
+
+               /*
+                * Signal an error for this slot. This in turn requires
+                * immediate unmapping.
+                */
+               map_ops[i].status = GNTST_general_error;
+               unmap.host_addr = map_ops[i].host_addr,
+               unmap.handle = map_ops[i].handle;
+               map_ops[i].handle = ~0;
+               if (map_ops[i].flags & GNTMAP_device_map)
+                       unmap.dev_bus_addr = map_ops[i].dev_bus_addr;
+               else
+                       unmap.dev_bus_addr = 0;
+
+               /*
+                * Pre-populate the status field, to be recognizable in
+                * the log message below.
+                */
+               unmap.status = 1;
+
+               rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+                                              &unmap, 1);
+               if (rc || unmap.status != GNTST_okay)
+                       pr_err_once("gnttab unmap failed: rc=%d st=%d\n",
+                                   rc, unmap.status);
        }
 
        return 0;
index e67b22f..c1b2997 100644 (file)
@@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
         * need to push through a forced SIGSEGV.
         */
        while (1) {
-               get_signal(&ksig);
+               if (!get_signal(&ksig))
+                       break;
 
                /*
                 * get_signal() may have run a debugger (via notify_parent())
index 0cf71ff..877a402 100644 (file)
@@ -535,10 +535,16 @@ struct kvm_vcpu_hv {
 /* Xen HVM per vcpu emulation context */
 struct kvm_vcpu_xen {
        u64 hypercall_rip;
+       u32 current_runstate;
        bool vcpu_info_set;
        bool vcpu_time_info_set;
+       bool runstate_set;
        struct gfn_to_hva_cache vcpu_info_cache;
        struct gfn_to_hva_cache vcpu_time_info_cache;
+       struct gfn_to_hva_cache runstate_cache;
+       u64 last_steal;
+       u64 runstate_entry_time;
+       u64 runstate_times[4];
 };
 
 struct kvm_vcpu_arch {
@@ -939,9 +945,6 @@ struct kvm_arch {
        unsigned int indirect_shadow_pages;
        u8 mmu_valid_gen;
        struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
-       /*
-        * Hash table of struct kvm_mmu_page.
-        */
        struct list_head active_mmu_pages;
        struct list_head zapped_obsolete_pages;
        struct list_head lpage_disallowed_mmu_pages;
index 1a162e5..7068e4b 100644 (file)
@@ -86,6 +86,18 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 }
 #endif
 
+/*
+ * The maximum amount of extra memory compared to the base size.  The
+ * main scaling factor is the size of struct page.  At extreme ratios
+ * of base:extra, all the base memory can be filled with page
+ * structures for the extra memory, leaving no space for anything
+ * else.
+ *
+ * 10x seems like a reasonable balance between scaling flexibility and
+ * leaving a practically usable system.
+ */
+#define XEN_EXTRA_MEM_RATIO    (10)
+
 /*
  * Helper functions to write or read unsigned long values to/from
  * memory, when the access may fault.
index 7ac5926..a788d51 100644 (file)
@@ -103,6 +103,15 @@ config KVM_AMD_SEV
          Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
          with Encrypted State (SEV-ES) on AMD processors.
 
+config KVM_XEN
+       bool "Support for Xen hypercall interface"
+       depends on KVM
+       help
+         Provides KVM support for the hosting Xen HVM guests and
+         passing Xen hypercalls to userspace.
+
+         If in doubt, say "N".
+
 config KVM_MMU_AUDIT
        bool "Audit KVM MMU"
        depends on KVM && TRACEPOINTS
index aeab168..1b4766f 100644 (file)
@@ -14,11 +14,12 @@ kvm-y                       += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
                                $(KVM)/dirty_ring.o
 kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
 
-kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o xen.o \
+kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o \
                           i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
                           hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
                           mmu/spte.o
 kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
+kvm-$(CONFIG_KVM_XEN)  += xen.o
 
 kvm-intel-y            += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
                           vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
index 7d2dae9..58fa8c0 100644 (file)
@@ -159,7 +159,7 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
        struct kvm_vcpu_hv_synic *synic;
 
        vcpu = get_vcpu_by_vpidx(kvm, vpidx);
-       if (!vcpu)
+       if (!vcpu || !to_hv_vcpu(vcpu))
                return NULL;
        synic = to_hv_synic(vcpu);
        return (synic->active) ? synic : NULL;
index 72b0928..ec4fc28 100644 (file)
@@ -81,15 +81,15 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
 static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
 {
        /*
-        * When using the EPT page-modification log, the GPAs in the log
-        * would come from L2 rather than L1.  Therefore, we need to rely
-        * on write protection to record dirty pages.  This also bypasses
-        * PML, since writes now result in a vmexit.  Note, this helper will
-        * tag SPTEs as needing write-protection even if PML is disabled or
-        * unsupported, but that's ok because the tag is consumed if and only
-        * if PML is enabled.  Omit the PML check to save a few uops.
+        * When using the EPT page-modification log, the GPAs in the CPU dirty
+        * log would come from L2 rather than L1.  Therefore, we need to rely
+        * on write protection to record dirty pages, which bypasses PML, since
+        * writes now result in a vmexit.  Note, the check on CPU dirty logging
+        * being enabled is mandatory as the bits used to denote WP-only SPTEs
+        * are reserved for NPT w/ PAE (32-bit KVM).
         */
-       return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
+       return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
+              kvm_x86_ops.cpu_dirty_log_size;
 }
 
 bool is_nx_huge_page_enabled(void);
index c636021..baee91c 100644 (file)
@@ -1200,6 +1200,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
        init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
+       svm_set_cr4(&svm->vcpu, 0);
        svm_set_efer(&svm->vcpu, 0);
        save->dr6 = 0xffff0ff0;
        kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);
index 3712bb5..2a20ce6 100644 (file)
@@ -2957,6 +2957,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
        struct kvm_host_map map;
        struct kvm_steal_time *st;
 
+       if (kvm_xen_msr_enabled(vcpu->kvm)) {
+               kvm_xen_runstate_set_running(vcpu);
+               return;
+       }
+
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
@@ -3756,11 +3761,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
                r = 1;
                break;
+#ifdef CONFIG_KVM_XEN
        case KVM_CAP_XEN_HVM:
                r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
                    KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
                    KVM_XEN_HVM_CONFIG_SHARED_INFO;
+               if (sched_info_on())
+                       r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
                break;
+#endif
        case KVM_CAP_SYNC_REGS:
                r = KVM_SYNC_X86_VALID_FIELDS;
                break;
@@ -4038,7 +4047,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        if (vcpu->preempted && !vcpu->arch.guest_state_protected)
                vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
 
-       kvm_steal_time_set_preempted(vcpu);
+       if (kvm_xen_msr_enabled(vcpu->kvm))
+               kvm_xen_runstate_set_preempted(vcpu);
+       else
+               kvm_steal_time_set_preempted(vcpu);
+
        static_call(kvm_x86_vcpu_put)(vcpu);
        vcpu->arch.last_host_tsc = rdtsc();
        /*
@@ -5013,6 +5026,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        case KVM_GET_SUPPORTED_HV_CPUID:
                r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
                break;
+#ifdef CONFIG_KVM_XEN
        case KVM_XEN_VCPU_GET_ATTR: {
                struct kvm_xen_vcpu_attr xva;
 
@@ -5033,6 +5047,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = kvm_xen_vcpu_set_attr(vcpu, &xva);
                break;
        }
+#endif
        default:
                r = -EINVAL;
        }
@@ -5654,6 +5669,7 @@ set_pit2_out:
                        kvm->arch.bsp_vcpu_id = arg;
                mutex_unlock(&kvm->lock);
                break;
+#ifdef CONFIG_KVM_XEN
        case KVM_XEN_HVM_CONFIG: {
                struct kvm_xen_hvm_config xhc;
                r = -EFAULT;
@@ -5682,6 +5698,7 @@ set_pit2_out:
                r = kvm_xen_hvm_set_attr(kvm, &xha);
                break;
        }
+#endif
        case KVM_SET_CLOCK: {
                struct kvm_clock_data user_ns;
                u64 now_ns;
@@ -8040,7 +8057,10 @@ void kvm_arch_exit(void)
        kvm_mmu_module_exit();
        free_percpu(user_return_msrs);
        kmem_cache_destroy(x86_fpu_cache);
+#ifdef CONFIG_KVM_XEN
+       static_key_deferred_flush(&kvm_xen_enabled);
        WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
+#endif
 }
 
 static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
index af8f656..ae17250 100644 (file)
 #include "hyperv.h"
 
 #include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
 
 #include <trace/events/kvm.h>
 #include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
 
 #include "trace.h"
 
@@ -61,6 +63,132 @@ out:
        return ret;
 }
 
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
+{
+       struct kvm_vcpu_xen *vx = &v->arch.xen;
+       u64 now = get_kvmclock_ns(v->kvm);
+       u64 delta_ns = now - vx->runstate_entry_time;
+       u64 run_delay = current->sched_info.run_delay;
+
+       if (unlikely(!vx->runstate_entry_time))
+               vx->current_runstate = RUNSTATE_offline;
+
+       /*
+        * Time waiting for the scheduler isn't "stolen" if the
+        * vCPU wasn't running anyway.
+        */
+       if (vx->current_runstate == RUNSTATE_running) {
+               u64 steal_ns = run_delay - vx->last_steal;
+
+               delta_ns -= steal_ns;
+
+               vx->runstate_times[RUNSTATE_runnable] += steal_ns;
+       }
+       vx->last_steal = run_delay;
+
+       vx->runstate_times[vx->current_runstate] += delta_ns;
+       vx->current_runstate = state;
+       vx->runstate_entry_time = now;
+}
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+{
+       struct kvm_vcpu_xen *vx = &v->arch.xen;
+       uint64_t state_entry_time;
+       unsigned int offset;
+
+       kvm_xen_update_runstate(v, state);
+
+       if (!vx->runstate_set)
+               return;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+       offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+#ifdef CONFIG_X86_64
+       /*
+        * The only difference is alignment of uint64_t in 32-bit.
+        * So the first field 'state' is accessed directly using
+        * offsetof() (where its offset happens to be zero), while the
+        * remaining fields which are all uint64_t, start at 'offset'
+        * which we tweak here by adding 4.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+       if (v->kvm->arch.xen.long_mode)
+               offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+       /*
+        * First write the updated state_entry_time at the appropriate
+        * location determined by 'offset'.
+        */
+       state_entry_time = vx->runstate_entry_time;
+       state_entry_time |= XEN_RUNSTATE_UPDATE;
+
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+                    sizeof(state_entry_time));
+       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+                    sizeof(state_entry_time));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state_entry_time, offset,
+                                         sizeof(state_entry_time)))
+               return;
+       smp_wmb();
+
+       /*
+        * Next, write the new runstate. This is in the *same* place
+        * for 32-bit and 64-bit guests, asserted here for paranoia.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(vx->current_runstate));
+       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+                    sizeof(vx->current_runstate));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &vx->current_runstate,
+                                         offsetof(struct vcpu_runstate_info, state),
+                                         sizeof(vx->current_runstate)))
+               return;
+
+       /*
+        * Write the actual runstate times immediately after the
+        * runstate_entry_time.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(vx->runstate_times));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &vx->runstate_times[0],
+                                         offset + sizeof(u64),
+                                         sizeof(vx->runstate_times)))
+               return;
+
+       smp_wmb();
+
+       /*
+        * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
+        * runstate_entry_time field.
+        */
+
+       state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state_entry_time, offset,
+                                         sizeof(state_entry_time)))
+               return;
+}
+
 int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 {
        u8 rc = 0;
@@ -187,9 +315,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                /* No compat necessary here. */
                BUILD_BUG_ON(sizeof(struct vcpu_info) !=
                             sizeof(struct compat_vcpu_info));
+               BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
+                            offsetof(struct compat_vcpu_info, time));
 
                if (data->u.gpa == GPA_INVALID) {
                        vcpu->arch.xen.vcpu_info_set = false;
+                       r = 0;
                        break;
                }
 
@@ -206,6 +337,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
        case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
                if (data->u.gpa == GPA_INVALID) {
                        vcpu->arch.xen.vcpu_time_info_set = false;
+                       r = 0;
                        break;
                }
 
@@ -219,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                }
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.gpa == GPA_INVALID) {
+                       vcpu->arch.xen.runstate_set = false;
+                       r = 0;
+                       break;
+               }
+
+               r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+                                             &vcpu->arch.xen.runstate_cache,
+                                             data->u.gpa,
+                                             sizeof(struct vcpu_runstate_info));
+               if (!r) {
+                       vcpu->arch.xen.runstate_set = true;
+               }
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline) {
+                       r = -EINVAL;
+                       break;
+               }
+               if (data->u.runstate.state_entry_time !=
+                   (data->u.runstate.time_running +
+                    data->u.runstate.time_runnable +
+                    data->u.runstate.time_blocked +
+                    data->u.runstate.time_offline)) {
+                       r = -EINVAL;
+                       break;
+               }
+               if (get_kvmclock_ns(vcpu->kvm) <
+                   data->u.runstate.state_entry_time) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               vcpu->arch.xen.current_runstate = data->u.runstate.state;
+               vcpu->arch.xen.runstate_entry_time =
+                       data->u.runstate.state_entry_time;
+               vcpu->arch.xen.runstate_times[RUNSTATE_running] =
+                       data->u.runstate.time_running;
+               vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
+                       data->u.runstate.time_runnable;
+               vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
+                       data->u.runstate.time_blocked;
+               vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
+                       data->u.runstate.time_offline;
+               vcpu->arch.xen.last_steal = current->sched_info.run_delay;
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline &&
+                   data->u.runstate.state != (u64)-1) {
+                       r = -EINVAL;
+                       break;
+               }
+               /* The adjustment must add up */
+               if (data->u.runstate.state_entry_time !=
+                   (data->u.runstate.time_running +
+                    data->u.runstate.time_runnable +
+                    data->u.runstate.time_blocked +
+                    data->u.runstate.time_offline)) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               if (get_kvmclock_ns(vcpu->kvm) <
+                   (vcpu->arch.xen.runstate_entry_time +
+                    data->u.runstate.state_entry_time)) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               vcpu->arch.xen.runstate_entry_time +=
+                       data->u.runstate.state_entry_time;
+               vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
+                       data->u.runstate.time_running;
+               vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
+                       data->u.runstate.time_runnable;
+               vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
+                       data->u.runstate.time_blocked;
+               vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
+                       data->u.runstate.time_offline;
+
+               if (data->u.runstate.state <= RUNSTATE_offline)
+                       kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+               r = 0;
+               break;
+
        default:
                break;
        }
@@ -251,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                r = 0;
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (vcpu->arch.xen.runstate_set) {
+                       data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
+                       r = 0;
+               }
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               data->u.runstate.state = vcpu->arch.xen.current_runstate;
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               data->u.runstate.state = vcpu->arch.xen.current_runstate;
+               data->u.runstate.state_entry_time =
+                       vcpu->arch.xen.runstate_entry_time;
+               data->u.runstate.time_running =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_running];
+               data->u.runstate.time_runnable =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
+               data->u.runstate.time_blocked =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
+               data->u.runstate.time_offline =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_offline];
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+               r = -EINVAL;
+               break;
+
        default:
                break;
        }
index b66a921..463a784 100644 (file)
@@ -9,6 +9,7 @@
 #ifndef __ARCH_X86_KVM_XEN_H__
 #define __ARCH_X86_KVM_XEN_H__
 
+#ifdef CONFIG_KVM_XEN
 #include <linux/jump_label_ratelimit.h>
 
 extern struct static_key_false_deferred kvm_xen_enabled;
@@ -18,11 +19,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
-int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
 void kvm_xen_destroy_vm(struct kvm *kvm);
 
+static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
+{
+       return static_branch_unlikely(&kvm_xen_enabled.key) &&
+               kvm->arch.xen_hvm_config.msr;
+}
+
 static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
 {
        return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -38,11 +44,59 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
 
        return 0;
 }
+#else
+static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
+{
+       return 1;
+}
+
+static inline void kvm_xen_destroy_vm(struct kvm *kvm)
+{
+}
+
+static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
+{
+       return false;
+}
+
+static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
+{
+       return false;
+}
+
+static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
+{
+       return 0;
+}
+#endif
+
+int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
 
-/* 32-bit compatibility definitions, also used natively in 32-bit build */
 #include <asm/pvclock-abi.h>
 #include <asm/xen/interface.h>
+#include <xen/interface/vcpu.h>
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
 
+static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
+{
+       kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
+}
+
+static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+{
+       /*
+        * If the vCPU wasn't preempted but took a normal exit for
+        * some reason (hypercalls, I/O, etc.), that is accounted as
+        * still RUNSTATE_running, as the VMM is still operating on
+        * behalf of the vCPU. Only if the VMM does actually block
+        * does it need to enter RUNSTATE_blocked.
+        */
+       if (vcpu->preempted)
+               kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
+}
+
+/* 32-bit compatibility definitions, also used natively in 32-bit build */
 struct compat_arch_vcpu_info {
        unsigned int cr2;
        unsigned int pad[5];
@@ -75,4 +129,10 @@ struct compat_shared_info {
        struct compat_arch_shared_info arch;
 };
 
+struct compat_vcpu_runstate_info {
+    int state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+} __attribute__((packed));
+
 #endif /* __ARCH_X86_KVM_XEN_H__ */
index b5949e5..a3cc330 100644 (file)
@@ -416,6 +416,9 @@ void __init xen_vmalloc_p2m_tree(void)
        xen_p2m_last_pfn = xen_max_p2m_pfn;
 
        p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE;
+       if (!p2m_limit && IS_ENABLED(CONFIG_XEN_UNPOPULATED_ALLOC))
+               p2m_limit = xen_start_info->nr_pages * XEN_EXTRA_MEM_RATIO;
+
        vm.flags = VM_ALLOC;
        vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit),
                        PMD_SIZE * PMDS_PER_MID_PAGE);
@@ -652,10 +655,9 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
        pte_t *ptep;
        unsigned int level;
 
-       if (unlikely(pfn >= xen_p2m_size)) {
-               BUG_ON(mfn != INVALID_P2M_ENTRY);
-               return true;
-       }
+       /* Only invalid entries allowed above the highest p2m covered frame. */
+       if (unlikely(pfn >= xen_p2m_size))
+               return mfn == INVALID_P2M_ENTRY;
 
        /*
         * The interface requires atomic updates on p2m elements.
@@ -710,6 +712,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 
        for (i = 0; i < count; i++) {
                unsigned long mfn, pfn;
+               struct gnttab_unmap_grant_ref unmap[2];
+               int rc;
 
                /* Do not add to override if the map failed. */
                if (map_ops[i].status != GNTST_okay ||
@@ -727,10 +731,46 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 
                WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");
 
-               if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
-                       ret = -ENOMEM;
-                       goto out;
+               if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
+                       continue;
+
+               /*
+                * Signal an error for this slot. This in turn requires
+                * immediate unmapping.
+                */
+               map_ops[i].status = GNTST_general_error;
+               unmap[0].host_addr = map_ops[i].host_addr,
+               unmap[0].handle = map_ops[i].handle;
+               map_ops[i].handle = ~0;
+               if (map_ops[i].flags & GNTMAP_device_map)
+                       unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr;
+               else
+                       unmap[0].dev_bus_addr = 0;
+
+               if (kmap_ops) {
+                       kmap_ops[i].status = GNTST_general_error;
+                       unmap[1].host_addr = kmap_ops[i].host_addr,
+                       unmap[1].handle = kmap_ops[i].handle;
+                       kmap_ops[i].handle = ~0;
+                       if (kmap_ops[i].flags & GNTMAP_device_map)
+                               unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr;
+                       else
+                               unmap[1].dev_bus_addr = 0;
                }
+
+               /*
+                * Pre-populate both status fields, to be recognizable in
+                * the log message below.
+                */
+               unmap[0].status = 1;
+               unmap[1].status = 1;
+
+               rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+                                              unmap, 1 + !!kmap_ops);
+               if (rc || unmap[0].status != GNTST_okay ||
+                   unmap[1].status != GNTST_okay)
+                       pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n",
+                                   rc, unmap[0].status, unmap[1].status);
        }
 
 out:
index 7eab14d..1a3b756 100644 (file)
@@ -59,18 +59,6 @@ static struct {
 } xen_remap_buf __initdata __aligned(PAGE_SIZE);
 static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
 
-/* 
- * The maximum amount of extra memory compared to the base size.  The
- * main scaling factor is the size of struct page.  At extreme ratios
- * of base:extra, all the base memory can be filled with page
- * structures for the extra memory, leaving no space for anything
- * else.
- * 
- * 10x seems like a reasonable balance between scaling flexibility and
- * leaving a practically usable system.
- */
-#define EXTRA_MEM_RATIO                (10)
-
 static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB);
 
 static void __init xen_parse_512gb(void)
@@ -790,20 +778,13 @@ char * __init xen_memory_setup(void)
                extra_pages += max_pages - max_pfn;
 
        /*
-        * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
-        * factor the base size.  On non-highmem systems, the base
-        * size is the full initial memory allocation; on highmem it
-        * is limited to the max size of lowmem, so that it doesn't
-        * get completely filled.
+        * Clamp the amount of extra memory to a XEN_EXTRA_MEM_RATIO
+        * factor the base size.
         *
         * Make sure we have no memory above max_pages, as this area
         * isn't handled by the p2m management.
-        *
-        * In principle there could be a problem in lowmem systems if
-        * the initial memory is also very large with respect to
-        * lowmem, but we won't try to deal with that here.
         */
-       extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
+       extra_pages = min3(XEN_EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
                           extra_pages, max_pages - max_pfn);
        i = 0;
        addr = xen_e820_table.entries[0].addr;
index ec482e6..9558613 100644 (file)
@@ -162,7 +162,7 @@ BFQ_BFQQ_FNS(split_coop);
 BFQ_BFQQ_FNS(softrt_update);
 #undef BFQ_BFQQ_FNS                                            \
 
-/* Expiration time of sync (0) and async (1) requests, in ns. */
+/* Expiration time of async (0) and sync (1) requests, in ns. */
 static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
 
 /* Maximum backwards seek (magic number lifted from CFQ), in KiB. */
index 4de03da..9ebb344 100644 (file)
@@ -292,7 +292,6 @@ static const char *const cmd_flag_name[] = {
 
 #define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
 static const char *const rqf_name[] = {
-       RQF_NAME(SORTED),
        RQF_NAME(STARTED),
        RQF_NAME(SOFTBARRIER),
        RQF_NAME(FLUSH_SEQ),
index ddb65e9..e1e997a 100644 (file)
@@ -385,7 +385,6 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
 
 static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
-                                      bool has_sched,
                                       struct request *rq)
 {
        /*
@@ -402,9 +401,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
        if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
                return true;
 
-       if (has_sched)
-               rq->rq_flags |= RQF_SORTED;
-
        return false;
 }
 
@@ -418,7 +414,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
 
        WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
 
-       if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
+       if (blk_mq_sched_bypass_insert(hctx, rq)) {
                /*
                 * Firstly normal IO request is inserted to scheduler queue or
                 * sw queue, meantime we add flush request to dispatch queue(
index fcc5301..c55e8f0 100644 (file)
@@ -45,11 +45,10 @@ static void disk_release_events(struct gendisk *disk);
 void set_capacity(struct gendisk *disk, sector_t sectors)
 {
        struct block_device *bdev = disk->part0;
-       unsigned long flags;
 
-       spin_lock_irqsave(&bdev->bd_size_lock, flags);
+       spin_lock(&bdev->bd_size_lock);
        i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
-       spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
+       spin_unlock(&bdev->bd_size_lock);
 }
 EXPORT_SYMBOL(set_capacity);
 
index f3d9ff2..1a75589 100644 (file)
@@ -88,11 +88,9 @@ static int (*check_part[])(struct parsed_partitions *) = {
 
 static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&bdev->bd_size_lock, flags);
+       spin_lock(&bdev->bd_size_lock);
        i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
-       spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
+       spin_unlock(&bdev->bd_size_lock);
 }
 
 static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
index a46a7e3..18b8242 100644 (file)
@@ -325,22 +325,22 @@ static void rpm_put_suppliers(struct device *dev)
 static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
        __releases(&dev->power.lock) __acquires(&dev->power.lock)
 {
-       int retval, idx;
        bool use_links = dev->power.links_count > 0;
+       bool get = false;
+       int retval, idx;
+       bool put;
 
        if (dev->power.irq_safe) {
                spin_unlock(&dev->power.lock);
+       } else if (!use_links) {
+               spin_unlock_irq(&dev->power.lock);
        } else {
+               get = dev->power.runtime_status == RPM_RESUMING;
+
                spin_unlock_irq(&dev->power.lock);
 
-               /*
-                * Resume suppliers if necessary.
-                *
-                * The device's runtime PM status cannot change until this
-                * routine returns, so it is safe to read the status outside of
-                * the lock.
-                */
-               if (use_links && dev->power.runtime_status == RPM_RESUMING) {
+               /* Resume suppliers if necessary. */
+               if (get) {
                        idx = device_links_read_lock();
 
                        retval = rpm_get_suppliers(dev);
@@ -355,24 +355,36 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
 
        if (dev->power.irq_safe) {
                spin_lock(&dev->power.lock);
-       } else {
-               /*
-                * If the device is suspending and the callback has returned
-                * success, drop the usage counters of the suppliers that have
-                * been reference counted on its resume.
-                *
-                * Do that if resume fails too.
-                */
-               if (use_links
-                   && ((dev->power.runtime_status == RPM_SUSPENDING && !retval)
-                   || (dev->power.runtime_status == RPM_RESUMING && retval))) {
-                       idx = device_links_read_lock();
+               return retval;
+       }
 
- fail:
-                       rpm_put_suppliers(dev);
+       spin_lock_irq(&dev->power.lock);
 
-                       device_links_read_unlock(idx);
-               }
+       if (!use_links)
+               return retval;
+
+       /*
+        * If the device is suspending and the callback has returned success,
+        * drop the usage counters of the suppliers that have been reference
+        * counted on its resume.
+        *
+        * Do that if the resume fails too.
+        */
+       put = dev->power.runtime_status == RPM_SUSPENDING && !retval;
+       if (put)
+               __update_runtime_status(dev, RPM_SUSPENDED);
+       else
+               put = get && retval;
+
+       if (put) {
+               spin_unlock_irq(&dev->power.lock);
+
+               idx = device_links_read_lock();
+
+fail:
+               rpm_put_suppliers(dev);
+
+               device_links_read_unlock(idx);
 
                spin_lock_irq(&dev->power.lock);
        }
index 63f5498..5ac1881 100644 (file)
@@ -165,15 +165,17 @@ static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
 {
        struct rsxx_cardinfo *card = file_inode(fp)->i_private;
        char *buf;
-       ssize_t st;
+       int st;
 
        buf = kzalloc(cnt, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
 
        st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1);
-       if (!st)
-               st = copy_to_user(ubuf, buf, cnt);
+       if (!st) {
+               if (copy_to_user(ubuf, buf, cnt))
+                       st = -EFAULT;
+       }
        kfree(buf);
        if (st)
                return st;
index 4861669..6147977 100644 (file)
@@ -11,7 +11,6 @@
 #ifndef __RSXX_PRIV_H__
 #define __RSXX_PRIV_H__
 
-#include <linux/version.h>
 #include <linux/semaphore.h>
 
 #include <linux/fs.h>
index 19e23fc..ddaeceb 100644 (file)
@@ -278,8 +278,6 @@ static void tpm_devs_release(struct device *dev)
 {
        struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
 
-       dump_stack();
-
        /* release the master device reference */
        put_device(&chip->dev);
 }
index 431919d..a2e0395 100644 (file)
@@ -707,12 +707,22 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
        const char *desc = "attempting to generate an interrupt";
        u32 cap2;
        cap_t cap;
+       int ret;
 
+       /* TPM 2.0 */
        if (chip->flags & TPM_CHIP_FLAG_TPM2)
                return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
-       else
-               return tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc,
-                                 0);
+
+       /* TPM 1.2 */
+       ret = request_locality(chip, 0);
+       if (ret < 0)
+               return ret;
+
+       ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
+
+       release_locality(chip, 0);
+
+       return ret;
 }
 
 /* Register the IRQ and issue a command that will cause an interrupt. If an
@@ -1019,11 +1029,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
        init_waitqueue_head(&priv->read_queue);
        init_waitqueue_head(&priv->int_queue);
        if (irq != -1) {
-               /* Before doing irq testing issue a command to the TPM in polling mode
+               /*
+                * Before doing irq testing issue a command to the TPM in polling mode
                 * to make sure it works. May as well use that command to set the
                 * proper timeouts for the driver.
                 */
-               if (tpm_get_timeouts(chip)) {
+
+               rc = request_locality(chip, 0);
+               if (rc < 0)
+                       goto out_err;
+
+               rc = tpm_get_timeouts(chip);
+
+               release_locality(chip, 0);
+
+               if (rc) {
                        dev_err(dev, "Could not get TPM timeouts and durations\n");
                        rc = -ENODEV;
                        goto out_err;
index 8155c54..36a741d 100644 (file)
@@ -903,10 +903,11 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
  */
 bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
 {
+#if defined(CONFIG_AMD_PMC)
        if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
                if (adev->flags & AMD_IS_APU)
                        return true;
        }
-
+#endif
        return false;
 }
index 0a25fec..43059ea 100644 (file)
@@ -357,7 +357,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
        while (size) {
                uint32_t value;
 
-               value = RREG32_PCIE(*pos >> 2);
+               value = RREG32_PCIE(*pos);
                r = put_user(value, (uint32_t *)buf);
                if (r) {
                        pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@@ -424,7 +424,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
                        return r;
                }
 
-               WREG32_PCIE(*pos >> 2, value);
+               WREG32_PCIE(*pos, value);
 
                result += 4;
                buf += 4;
index 3c37cf1..64beb33 100644 (file)
@@ -173,8 +173,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
                switch (adev->asic_type) {
                case CHIP_VEGA20:
                case CHIP_ARCTURUS:
-               case CHIP_SIENNA_CICHLID:
-               case CHIP_NAVY_FLOUNDER:
                        /* enable runpm if runpm=1 */
                        if (amdgpu_runtime_pm > 0)
                                adev->runpm = true;
index 160fa5f..c625c5d 100644 (file)
@@ -558,7 +558,8 @@ static bool nv_is_headless_sku(struct pci_dev *pdev)
 {
        if ((pdev->device == 0x731E &&
            (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
-           (pdev->device == 0x7340 && pdev->revision == 0xC9))
+           (pdev->device == 0x7340 && pdev->revision == 0xC9)  ||
+           (pdev->device == 0x7360 && pdev->revision == 0xC7))
                return true;
        return false;
 }
@@ -634,7 +635,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
                if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
                    !amdgpu_sriov_vf(adev))
                        amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-               amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+               if (!nv_is_headless_sku(adev->pdev))
+                       amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
                if (!amdgpu_sriov_vf(adev))
                        amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
                break;
index 5159399..5750818 100644 (file)
@@ -530,7 +530,7 @@ bool dm_helpers_dp_write_dsc_enable(
 {
        uint8_t enable_dsc = enable ? 1 : 0;
        struct amdgpu_dm_connector *aconnector;
-       uint8_t ret;
+       uint8_t ret = 0;
 
        if (!stream)
                return false;
index 45564a7..9f0d03a 100644 (file)
@@ -1322,7 +1322,7 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
                                                       CMN2ASIC_MAPPING_WORKLOAD,
                                                       profile_mode);
        if (workload_type < 0) {
-               dev_err(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
+               dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
                return -EINVAL;
        }
 
index 9058546..a621185 100644 (file)
@@ -78,6 +78,9 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_smc.bin");
 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xC000
 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0xE
 
+#define mmTHM_BACO_CNTL_ARCT                   0xA7
+#define mmTHM_BACO_CNTL_ARCT_BASE_IDX          0
+
 static int link_width[] = {0, 1, 2, 4, 8, 12, 16};
 static int link_speed[] = {25, 50, 80, 160};
 
@@ -1532,9 +1535,15 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
                        break;
                default:
                        if (!ras || !ras->supported) {
-                               data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
-                               data |= 0x80000000;
-                               WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+                               if (adev->asic_type == CHIP_ARCTURUS) {
+                                       data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT);
+                                       data |= 0x80000000;
+                                       WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT, data);
+                               } else {
+                                       data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
+                                       data |= 0x80000000;
+                                       WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+                               }
 
                                ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 0, NULL);
                        } else {
index 093b011..7ddbaec 100644 (file)
@@ -810,7 +810,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
                                                       CMN2ASIC_MAPPING_WORKLOAD,
                                                       profile_mode);
        if (workload_type < 0) {
-               dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
+               dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
                                        profile_mode);
                return -EINVAL;
        }
@@ -1685,9 +1685,9 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en)
        uint32_t feature_mask[2];
        int ret = 0;
 
-       if (adev->pm.fw_version >= 0x43f1700)
+       if (adev->pm.fw_version >= 0x43f1700 && !en)
                ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify,
-                                                     en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL);
+                                                     RLC_STATUS_OFF, NULL);
 
        bitmap_zero(feature->enabled, feature->feature_num);
        bitmap_zero(feature->supported, feature->feature_num);
index 5faa509..5493388 100644 (file)
@@ -844,7 +844,7 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u
                 * TODO: If some case need switch to powersave/default power mode
                 * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving.
                 */
-               dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
+               dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
                return -EINVAL;
        }
 
index 69da601..e771bd5 100644 (file)
@@ -261,6 +261,9 @@ gk104_fifo_pbdma = {
 struct nvkm_engine *
 gk104_fifo_id_engine(struct nvkm_fifo *base, int engi)
 {
+       if (engi == GK104_FIFO_ENGN_SW)
+               return nvkm_device_engine(base->engine.subdev.device, NVKM_ENGINE_SW, 0);
+
        return gk104_fifo(base)->engine[engi].engine;
 }
 
index 1c4961e..bb0ee5c 100644 (file)
@@ -182,6 +182,10 @@ static bool increase_address_space(struct protection_domain *domain,
        bool ret = true;
        u64 *pte;
 
+       pte = (void *)get_zeroed_page(gfp);
+       if (!pte)
+               return false;
+
        spin_lock_irqsave(&domain->lock, flags);
 
        if (address <= PM_LEVEL_SIZE(domain->iop.mode))
@@ -191,10 +195,6 @@ static bool increase_address_space(struct protection_domain *domain,
        if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
                goto out;
 
-       pte = (void *)get_zeroed_page(gfp);
-       if (!pte)
-               goto out;
-
        *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
 
        domain->iop.root  = pte;
@@ -208,10 +208,12 @@ static bool increase_address_space(struct protection_domain *domain,
         */
        amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
 
+       pte = NULL;
        ret = true;
 
 out:
        spin_unlock_irqrestore(&domain->lock, flags);
+       free_page((unsigned long)pte);
 
        return ret;
 }
index 9ab6ee2..af765c8 100644 (file)
@@ -311,6 +311,11 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
        domain->ops->flush_iotlb_all(domain);
 }
 
+static bool dev_is_untrusted(struct device *dev)
+{
+       return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -365,8 +370,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 
        init_iova_domain(iovad, 1UL << order, base_pfn);
 
-       if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
-                       DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
+       if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
+           !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) &&
+           attr) {
                if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
                                          iommu_dma_entry_dtor))
                        pr_warn("iova flush queue initialization failed\n");
@@ -508,11 +514,6 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
                                iova_align(iovad, size), dir, attrs);
 }
 
-static bool dev_is_untrusted(struct device *dev)
-{
-       return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
-}
-
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
                size_t size, int prot, u64 dma_mask)
 {
index 97dfcff..444c0be 100644 (file)
@@ -30,8 +30,8 @@
 #define VCMD_VRSP_IP                   0x1
 #define VCMD_VRSP_SC(e)                        (((e) >> 1) & 0x3)
 #define VCMD_VRSP_SC_SUCCESS           0
-#define VCMD_VRSP_SC_NO_PASID_AVAIL    1
-#define VCMD_VRSP_SC_INVALID_PASID     1
+#define VCMD_VRSP_SC_NO_PASID_AVAIL    2
+#define VCMD_VRSP_SC_INVALID_PASID     2
 #define VCMD_VRSP_RESULT_PASID(e)      (((e) >> 8) & 0xfffff)
 #define VCMD_CMD_OPERAND(e)            ((e) << 8)
 /*
index 4a3f095..97eb62f 100644 (file)
@@ -798,10 +798,70 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
        return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova);
 }
 
+static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
+{
+       struct platform_device *pdev;
+       struct tegra_mc *mc;
+
+       pdev = of_find_device_by_node(np);
+       if (!pdev)
+               return NULL;
+
+       mc = platform_get_drvdata(pdev);
+       if (!mc)
+               return NULL;
+
+       return mc->smmu;
+}
+
+static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
+                               struct of_phandle_args *args)
+{
+       const struct iommu_ops *ops = smmu->iommu.ops;
+       int err;
+
+       err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops);
+       if (err < 0) {
+               dev_err(dev, "failed to initialize fwspec: %d\n", err);
+               return err;
+       }
+
+       err = ops->of_xlate(dev, args);
+       if (err < 0) {
+               dev_err(dev, "failed to parse SW group ID: %d\n", err);
+               iommu_fwspec_free(dev);
+               return err;
+       }
+
+       return 0;
+}
+
 static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
 {
-       struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
+       struct device_node *np = dev->of_node;
+       struct tegra_smmu *smmu = NULL;
+       struct of_phandle_args args;
+       unsigned int index = 0;
+       int err;
+
+       while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
+                                         &args) == 0) {
+               smmu = tegra_smmu_find(args.np);
+               if (smmu) {
+                       err = tegra_smmu_configure(smmu, dev, &args);
+                       of_node_put(args.np);
 
+                       if (err < 0)
+                               return ERR_PTR(err);
+
+                       break;
+               }
+
+               of_node_put(args.np);
+               index++;
+       }
+
+       smmu = dev_iommu_priv_get(dev);
        if (!smmu)
                return ERR_PTR(-ENODEV);
 
@@ -1028,6 +1088,16 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
        if (!smmu)
                return ERR_PTR(-ENOMEM);
 
+       /*
+        * This is a bit of a hack. Ideally we'd want to simply return this
+        * value. However the IOMMU registration process will attempt to add
+        * all devices to the IOMMU when bus_set_iommu() is called. In order
+        * not to rely on global variables to track the IOMMU instance, we
+        * set it here so that it can be looked up from the .probe_device()
+        * callback via the IOMMU device's .drvdata field.
+        */
+       mc->smmu = smmu;
+
        size = BITS_TO_LONGS(soc->num_asids) * sizeof(long);
 
        smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL);
index fce4cbf..50f3e67 100644 (file)
@@ -1526,6 +1526,10 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
 sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
 {
        sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
+       if (s >= c->start)
+               s -= c->start;
+       else
+               s = 0;
        if (likely(c->sectors_per_block_bits >= 0))
                s >>= c->sectors_per_block_bits;
        else
index fb41b4f..66f4c63 100644 (file)
@@ -61,19 +61,18 @@ static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
 static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
                           unsigned *offset, struct dm_buffer **buf)
 {
-       u64 position, block;
+       u64 position, block, rem;
        u8 *res;
 
        position = (index + rsb) * v->fec->roots;
-       block = position >> v->data_dev_block_bits;
-       *offset = (unsigned)(position - (block << v->data_dev_block_bits));
+       block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem);
+       *offset = (unsigned)rem;
 
-       res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
+       res = dm_bufio_read(v->fec->bufio, block, buf);
        if (IS_ERR(res)) {
                DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
                      v->data_dev->name, (unsigned long long)rsb,
-                     (unsigned long long)(v->fec->start + block),
-                     PTR_ERR(res));
+                     (unsigned long long)block, PTR_ERR(res));
                *buf = NULL;
        }
 
@@ -155,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
 
                /* read the next block when we run out of parity bytes */
                offset += v->fec->roots;
-               if (offset >= 1 << v->data_dev_block_bits) {
+               if (offset >= v->fec->roots << SECTOR_SHIFT) {
                        dm_bufio_release(buf);
 
                        par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
@@ -674,7 +673,7 @@ int verity_fec_ctr(struct dm_verity *v)
 {
        struct dm_verity_fec *f = v->fec;
        struct dm_target *ti = v->ti;
-       u64 hash_blocks;
+       u64 hash_blocks, fec_blocks;
        int ret;
 
        if (!verity_fec_is_enabled(v)) {
@@ -744,15 +743,17 @@ int verity_fec_ctr(struct dm_verity *v)
        }
 
        f->bufio = dm_bufio_client_create(f->dev->bdev,
-                                         1 << v->data_dev_block_bits,
+                                         f->roots << SECTOR_SHIFT,
                                          1, 0, NULL, NULL);
        if (IS_ERR(f->bufio)) {
                ti->error = "Cannot initialize FEC bufio client";
                return PTR_ERR(f->bufio);
        }
 
-       if (dm_bufio_get_device_size(f->bufio) <
-           ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) {
+       dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT));
+
+       fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT);
+       if (dm_bufio_get_device_size(f->bufio) < fec_blocks) {
                ti->error = "FEC device is too small";
                return -E2BIG;
        }
index e5c73f8..6afb5ca 100644 (file)
@@ -1343,11 +1343,21 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
                return 0;
 
        gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
-       if (nr_mops != 0)
+       if (nr_mops != 0) {
                ret = gnttab_map_refs(queue->tx_map_ops,
                                      NULL,
                                      queue->pages_to_map,
                                      nr_mops);
+               if (ret) {
+                       unsigned int i;
+
+                       netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n",
+                                  nr_mops, ret);
+                       for (i = 0; i < nr_mops; ++i)
+                               WARN_ON_ONCE(queue->tx_map_ops[i].status ==
+                                            GNTST_okay);
+               }
+       }
 
        work_done = xenvif_tx_submit(queue);
 
index 5dfd806..604ab0e 100644 (file)
@@ -630,7 +630,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
        opts->queue_size = NVMF_DEF_QUEUE_SIZE;
        opts->nr_io_queues = num_online_cpus();
        opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
-       opts->kato = NVME_DEFAULT_KATO;
+       opts->kato = 0;
        opts->duplicate_connect = false;
        opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
        opts->hdr_digest = false;
@@ -893,6 +893,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                opts->nr_write_queues = 0;
                opts->nr_poll_queues = 0;
                opts->duplicate_connect = true;
+       } else {
+               if (!opts->kato)
+                       opts->kato = NVME_DEFAULT_KATO;
        }
        if (ctrl_loss_tmo < 0) {
                opts->max_reconnects = -1;
index 8f9e969..0a586d7 100644 (file)
@@ -248,6 +248,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
        if (IS_ERR(hwmon)) {
                dev_warn(dev, "Failed to instantiate hwmon device\n");
                kfree(data);
+               return PTR_ERR(hwmon);
        }
        ctrl->hwmon_device = hwmon;
        return 0;
index 38b0d69..17ab332 100644 (file)
@@ -3234,7 +3234,8 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_DEVICE(0x126f, 0x2263),   /* Silicon Motion unidentified */
                .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
        { PCI_DEVICE(0x1bb1, 0x0100),   /* Seagate Nytro Flash Storage */
-               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
+                               NVME_QUIRK_NO_NS_DESC_LIST, },
        { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE(0x1c58, 0x0023),   /* WDC SN200 adapter */
@@ -3248,6 +3249,9 @@ static const struct pci_device_id nvme_id_table[] = {
                                NVME_QUIRK_IGNORE_DEV_SUBNQN, },
        { PCI_DEVICE(0x1987, 0x5016),   /* Phison E16 */
                .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+       { PCI_DEVICE(0x1b4b, 0x1092),   /* Lexar 256 GB SSD */
+               .driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
+                               NVME_QUIRK_IGNORE_DEV_SUBNQN, },
        { PCI_DEVICE(0x1d1d, 0x1f1f),   /* LighNVM qemu device */
                .driver_data = NVME_QUIRK_LIGHTNVM, },
        { PCI_DEVICE(0x1d1d, 0x2807),   /* CNEX WL */
@@ -3265,6 +3269,8 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE(0x1d97, 0x2263),   /* SPCC */
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+       { PCI_DEVICE(0x2646, 0x2262),   /* KINGSTON SKC2000 NVMe SSD */
+               .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
        { PCI_DEVICE(0x2646, 0x2263),   /* KINGSTON A2000 NVMe SSD  */
                .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
        { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
index bc6a774..fe6b8aa 100644 (file)
@@ -313,27 +313,40 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
        nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
 }
 
-static void nvmet_id_set_model_number(struct nvme_id_ctrl *id,
-                                     struct nvmet_subsys *subsys)
+static u16 nvmet_set_model_number(struct nvmet_subsys *subsys)
 {
-       const char *model = NVMET_DEFAULT_CTRL_MODEL;
-       struct nvmet_subsys_model *subsys_model;
+       u16 status = 0;
+
+       mutex_lock(&subsys->lock);
+       if (!subsys->model_number) {
+               subsys->model_number =
+                       kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
+               if (!subsys->model_number)
+                       status = NVME_SC_INTERNAL;
+       }
+       mutex_unlock(&subsys->lock);
 
-       rcu_read_lock();
-       subsys_model = rcu_dereference(subsys->model);
-       if (subsys_model)
-               model = subsys_model->number;
-       memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' ');
-       rcu_read_unlock();
+       return status;
 }
 
 static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
+       struct nvmet_subsys *subsys = ctrl->subsys;
        struct nvme_id_ctrl *id;
        u32 cmd_capsule_size;
        u16 status = 0;
 
+       /*
+        * If there is no model number yet, set it now.  It will then remain
+        * stable for the life time of the subsystem.
+        */
+       if (!subsys->model_number) {
+               status = nvmet_set_model_number(subsys);
+               if (status)
+                       goto out;
+       }
+
        id = kzalloc(sizeof(*id), GFP_KERNEL);
        if (!id) {
                status = NVME_SC_INTERNAL;
@@ -347,7 +360,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
        memset(id->sn, ' ', sizeof(id->sn));
        bin2hex(id->sn, &ctrl->subsys->serial,
                min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
-       nvmet_id_set_model_number(id, ctrl->subsys);
+       memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number,
+                      strlen(subsys->model_number), ' ');
        memcpy_and_pad(id->fr, sizeof(id->fr),
                       UTS_RELEASE, strlen(UTS_RELEASE), ' ');
 
index 635a7cb..e5dbd19 100644 (file)
@@ -1118,16 +1118,12 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
                                            char *page)
 {
        struct nvmet_subsys *subsys = to_subsys(item);
-       struct nvmet_subsys_model *subsys_model;
-       char *model = NVMET_DEFAULT_CTRL_MODEL;
        int ret;
 
-       rcu_read_lock();
-       subsys_model = rcu_dereference(subsys->model);
-       if (subsys_model)
-               model = subsys_model->number;
-       ret = snprintf(page, PAGE_SIZE, "%s\n", model);
-       rcu_read_unlock();
+       mutex_lock(&subsys->lock);
+       ret = snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number ?
+                       subsys->model_number : NVMET_DEFAULT_CTRL_MODEL);
+       mutex_unlock(&subsys->lock);
 
        return ret;
 }
@@ -1138,14 +1134,17 @@ static bool nvmet_is_ascii(const char c)
        return c >= 0x20 && c <= 0x7e;
 }
 
-static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
-                                            const char *page, size_t count)
+static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
+               const char *page, size_t count)
 {
-       struct nvmet_subsys *subsys = to_subsys(item);
-       struct nvmet_subsys_model *new_model;
-       char *new_model_number;
        int pos = 0, len;
 
+       if (subsys->model_number) {
+               pr_err("Can't set model number. %s is already assigned\n",
+                      subsys->model_number);
+               return -EINVAL;
+       }
+
        len = strcspn(page, "\n");
        if (!len)
                return -EINVAL;
@@ -1155,28 +1154,25 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
                        return -EINVAL;
        }
 
-       new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
-       if (!new_model_number)
+       subsys->model_number = kmemdup_nul(page, len, GFP_KERNEL);
+       if (!subsys->model_number)
                return -ENOMEM;
+       return count;
+}
 
-       new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL);
-       if (!new_model) {
-               kfree(new_model_number);
-               return -ENOMEM;
-       }
-       memcpy(new_model->number, new_model_number, len);
+static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
+                                            const char *page, size_t count)
+{
+       struct nvmet_subsys *subsys = to_subsys(item);
+       ssize_t ret;
 
        down_write(&nvmet_config_sem);
        mutex_lock(&subsys->lock);
-       new_model = rcu_replace_pointer(subsys->model, new_model,
-                                       mutex_is_locked(&subsys->lock));
+       ret = nvmet_subsys_attr_model_store_locked(subsys, page, count);
        mutex_unlock(&subsys->lock);
        up_write(&nvmet_config_sem);
 
-       kfree_rcu(new_model, rcuhead);
-       kfree(new_model_number);
-
-       return count;
+       return ret;
 }
 CONFIGFS_ATTR(nvmet_subsys_, attr_model);
 
index 67bbf0e..be6fcda 100644 (file)
@@ -1532,7 +1532,7 @@ static void nvmet_subsys_free(struct kref *ref)
        nvmet_passthru_subsys_free(subsys);
 
        kfree(subsys->subsysnqn);
-       kfree_rcu(subsys->model, rcuhead);
+       kfree(subsys->model_number);
        kfree(subsys);
 }
 
index cdfa537..4b84edb 100644 (file)
@@ -208,11 +208,6 @@ struct nvmet_ctrl {
        bool                    pi_support;
 };
 
-struct nvmet_subsys_model {
-       struct rcu_head         rcuhead;
-       char                    number[];
-};
-
 struct nvmet_subsys {
        enum nvme_subsys_type   type;
 
@@ -242,7 +237,7 @@ struct nvmet_subsys {
        struct config_group     namespaces_group;
        struct config_group     allowed_hosts_group;
 
-       struct nvmet_subsys_model       __rcu *model;
+       char                    *model_number;
 
 #ifdef CONFIG_NVME_TARGET_PASSTHRU
        struct nvme_ctrl        *passthru_ctrl;
index 20b4325..8242e8c 100644 (file)
@@ -45,7 +45,7 @@ config IDLE_INJECT
          on a per CPU basis.
 
 config DTPM
-       bool "Power capping for Dynamic Thermal Power Management"
+       bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
        help
          This enables support for the power capping for the dynamic
          thermal power management userspace engine.
index 5a51cd3..c2185ec 100644 (file)
@@ -207,6 +207,9 @@ int dtpm_release_zone(struct powercap_zone *pcz)
        if (dtpm->ops)
                dtpm->ops->release(dtpm);
 
+       if (root == dtpm)
+               root = NULL;
+
        kfree(dtpm);
 
        return 0;
index 7ad11e4..04633e5 100644 (file)
@@ -3430,125 +3430,125 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
 
        switch(param) {
        case ISCSI_PARAM_FAST_ABORT:
-               len = sprintf(buf, "%d\n", session->fast_abort);
+               len = sysfs_emit(buf, "%d\n", session->fast_abort);
                break;
        case ISCSI_PARAM_ABORT_TMO:
-               len = sprintf(buf, "%d\n", session->abort_timeout);
+               len = sysfs_emit(buf, "%d\n", session->abort_timeout);
                break;
        case ISCSI_PARAM_LU_RESET_TMO:
-               len = sprintf(buf, "%d\n", session->lu_reset_timeout);
+               len = sysfs_emit(buf, "%d\n", session->lu_reset_timeout);
                break;
        case ISCSI_PARAM_TGT_RESET_TMO:
-               len = sprintf(buf, "%d\n", session->tgt_reset_timeout);
+               len = sysfs_emit(buf, "%d\n", session->tgt_reset_timeout);
                break;
        case ISCSI_PARAM_INITIAL_R2T_EN:
-               len = sprintf(buf, "%d\n", session->initial_r2t_en);
+               len = sysfs_emit(buf, "%d\n", session->initial_r2t_en);
                break;
        case ISCSI_PARAM_MAX_R2T:
-               len = sprintf(buf, "%hu\n", session->max_r2t);
+               len = sysfs_emit(buf, "%hu\n", session->max_r2t);
                break;
        case ISCSI_PARAM_IMM_DATA_EN:
-               len = sprintf(buf, "%d\n", session->imm_data_en);
+               len = sysfs_emit(buf, "%d\n", session->imm_data_en);
                break;
        case ISCSI_PARAM_FIRST_BURST:
-               len = sprintf(buf, "%u\n", session->first_burst);
+               len = sysfs_emit(buf, "%u\n", session->first_burst);
                break;
        case ISCSI_PARAM_MAX_BURST:
-               len = sprintf(buf, "%u\n", session->max_burst);
+               len = sysfs_emit(buf, "%u\n", session->max_burst);
                break;
        case ISCSI_PARAM_PDU_INORDER_EN:
-               len = sprintf(buf, "%d\n", session->pdu_inorder_en);
+               len = sysfs_emit(buf, "%d\n", session->pdu_inorder_en);
                break;
        case ISCSI_PARAM_DATASEQ_INORDER_EN:
-               len = sprintf(buf, "%d\n", session->dataseq_inorder_en);
+               len = sysfs_emit(buf, "%d\n", session->dataseq_inorder_en);
                break;
        case ISCSI_PARAM_DEF_TASKMGMT_TMO:
-               len = sprintf(buf, "%d\n", session->def_taskmgmt_tmo);
+               len = sysfs_emit(buf, "%d\n", session->def_taskmgmt_tmo);
                break;
        case ISCSI_PARAM_ERL:
-               len = sprintf(buf, "%d\n", session->erl);
+               len = sysfs_emit(buf, "%d\n", session->erl);
                break;
        case ISCSI_PARAM_TARGET_NAME:
-               len = sprintf(buf, "%s\n", session->targetname);
+               len = sysfs_emit(buf, "%s\n", session->targetname);
                break;
        case ISCSI_PARAM_TARGET_ALIAS:
-               len = sprintf(buf, "%s\n", session->targetalias);
+               len = sysfs_emit(buf, "%s\n", session->targetalias);
                break;
        case ISCSI_PARAM_TPGT:
-               len = sprintf(buf, "%d\n", session->tpgt);
+               len = sysfs_emit(buf, "%d\n", session->tpgt);
                break;
        case ISCSI_PARAM_USERNAME:
-               len = sprintf(buf, "%s\n", session->username);
+               len = sysfs_emit(buf, "%s\n", session->username);
                break;
        case ISCSI_PARAM_USERNAME_IN:
-               len = sprintf(buf, "%s\n", session->username_in);
+               len = sysfs_emit(buf, "%s\n", session->username_in);
                break;
        case ISCSI_PARAM_PASSWORD:
-               len = sprintf(buf, "%s\n", session->password);
+               len = sysfs_emit(buf, "%s\n", session->password);
                break;
        case ISCSI_PARAM_PASSWORD_IN:
-               len = sprintf(buf, "%s\n", session->password_in);
+               len = sysfs_emit(buf, "%s\n", session->password_in);
                break;
        case ISCSI_PARAM_IFACE_NAME:
-               len = sprintf(buf, "%s\n", session->ifacename);
+               len = sysfs_emit(buf, "%s\n", session->ifacename);
                break;
        case ISCSI_PARAM_INITIATOR_NAME:
-               len = sprintf(buf, "%s\n", session->initiatorname);
+               len = sysfs_emit(buf, "%s\n", session->initiatorname);
                break;
        case ISCSI_PARAM_BOOT_ROOT:
-               len = sprintf(buf, "%s\n", session->boot_root);
+               len = sysfs_emit(buf, "%s\n", session->boot_root);
                break;
        case ISCSI_PARAM_BOOT_NIC:
-               len = sprintf(buf, "%s\n", session->boot_nic);
+               len = sysfs_emit(buf, "%s\n", session->boot_nic);
                break;
        case ISCSI_PARAM_BOOT_TARGET:
-               len = sprintf(buf, "%s\n", session->boot_target);
+               len = sysfs_emit(buf, "%s\n", session->boot_target);
                break;
        case ISCSI_PARAM_AUTO_SND_TGT_DISABLE:
-               len = sprintf(buf, "%u\n", session->auto_snd_tgt_disable);
+               len = sysfs_emit(buf, "%u\n", session->auto_snd_tgt_disable);
                break;
        case ISCSI_PARAM_DISCOVERY_SESS:
-               len = sprintf(buf, "%u\n", session->discovery_sess);
+               len = sysfs_emit(buf, "%u\n", session->discovery_sess);
                break;
        case ISCSI_PARAM_PORTAL_TYPE:
-               len = sprintf(buf, "%s\n", session->portal_type);
+               len = sysfs_emit(buf, "%s\n", session->portal_type);
                break;
        case ISCSI_PARAM_CHAP_AUTH_EN:
-               len = sprintf(buf, "%u\n", session->chap_auth_en);
+               len = sysfs_emit(buf, "%u\n", session->chap_auth_en);
                break;
        case ISCSI_PARAM_DISCOVERY_LOGOUT_EN:
-               len = sprintf(buf, "%u\n", session->discovery_logout_en);
+               len = sysfs_emit(buf, "%u\n", session->discovery_logout_en);
                break;
        case ISCSI_PARAM_BIDI_CHAP_EN:
-               len = sprintf(buf, "%u\n", session->bidi_chap_en);
+               len = sysfs_emit(buf, "%u\n", session->bidi_chap_en);
                break;
        case ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL:
-               len = sprintf(buf, "%u\n", session->discovery_auth_optional);
+               len = sysfs_emit(buf, "%u\n", session->discovery_auth_optional);
                break;
        case ISCSI_PARAM_DEF_TIME2WAIT:
-               len = sprintf(buf, "%d\n", session->time2wait);
+               len = sysfs_emit(buf, "%d\n", session->time2wait);
                break;
        case ISCSI_PARAM_DEF_TIME2RETAIN:
-               len = sprintf(buf, "%d\n", session->time2retain);
+               len = sysfs_emit(buf, "%d\n", session->time2retain);
                break;
        case ISCSI_PARAM_TSID:
-               len = sprintf(buf, "%u\n", session->tsid);
+               len = sysfs_emit(buf, "%u\n", session->tsid);
                break;
        case ISCSI_PARAM_ISID:
-               len = sprintf(buf, "%02x%02x%02x%02x%02x%02x\n",
+               len = sysfs_emit(buf, "%02x%02x%02x%02x%02x%02x\n",
                              session->isid[0], session->isid[1],
                              session->isid[2], session->isid[3],
                              session->isid[4], session->isid[5]);
                break;
        case ISCSI_PARAM_DISCOVERY_PARENT_IDX:
-               len = sprintf(buf, "%u\n", session->discovery_parent_idx);
+               len = sysfs_emit(buf, "%u\n", session->discovery_parent_idx);
                break;
        case ISCSI_PARAM_DISCOVERY_PARENT_TYPE:
                if (session->discovery_parent_type)
-                       len = sprintf(buf, "%s\n",
+                       len = sysfs_emit(buf, "%s\n",
                                      session->discovery_parent_type);
                else
-                       len = sprintf(buf, "\n");
+                       len = sysfs_emit(buf, "\n");
                break;
        default:
                return -ENOSYS;
@@ -3580,16 +3580,16 @@ int iscsi_conn_get_addr_param(struct sockaddr_storage *addr,
        case ISCSI_PARAM_CONN_ADDRESS:
        case ISCSI_HOST_PARAM_IPADDRESS:
                if (sin)
-                       len = sprintf(buf, "%pI4\n", &sin->sin_addr.s_addr);
+                       len = sysfs_emit(buf, "%pI4\n", &sin->sin_addr.s_addr);
                else
-                       len = sprintf(buf, "%pI6\n", &sin6->sin6_addr);
+                       len = sysfs_emit(buf, "%pI6\n", &sin6->sin6_addr);
                break;
        case ISCSI_PARAM_CONN_PORT:
        case ISCSI_PARAM_LOCAL_PORT:
                if (sin)
-                       len = sprintf(buf, "%hu\n", be16_to_cpu(sin->sin_port));
+                       len = sysfs_emit(buf, "%hu\n", be16_to_cpu(sin->sin_port));
                else
-                       len = sprintf(buf, "%hu\n",
+                       len = sysfs_emit(buf, "%hu\n",
                                      be16_to_cpu(sin6->sin6_port));
                break;
        default:
@@ -3608,88 +3608,88 @@ int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
 
        switch(param) {
        case ISCSI_PARAM_PING_TMO:
-               len = sprintf(buf, "%u\n", conn->ping_timeout);
+               len = sysfs_emit(buf, "%u\n", conn->ping_timeout);
                break;
        case ISCSI_PARAM_RECV_TMO:
-               len = sprintf(buf, "%u\n", conn->recv_timeout);
+               len = sysfs_emit(buf, "%u\n", conn->recv_timeout);
                break;
        case ISCSI_PARAM_MAX_RECV_DLENGTH:
-               len = sprintf(buf, "%u\n", conn->max_recv_dlength);
+               len = sysfs_emit(buf, "%u\n", conn->max_recv_dlength);
                break;
        case ISCSI_PARAM_MAX_XMIT_DLENGTH:
-               len = sprintf(buf, "%u\n", conn->max_xmit_dlength);
+               len = sysfs_emit(buf, "%u\n", conn->max_xmit_dlength);
                break;
        case ISCSI_PARAM_HDRDGST_EN:
-               len = sprintf(buf, "%d\n", conn->hdrdgst_en);
+               len = sysfs_emit(buf, "%d\n", conn->hdrdgst_en);
                break;
        case ISCSI_PARAM_DATADGST_EN:
-               len = sprintf(buf, "%d\n", conn->datadgst_en);
+               len = sysfs_emit(buf, "%d\n", conn->datadgst_en);
                break;
        case ISCSI_PARAM_IFMARKER_EN:
-               len = sprintf(buf, "%d\n", conn->ifmarker_en);
+               len = sysfs_emit(buf, "%d\n", conn->ifmarker_en);
                break;
        case ISCSI_PARAM_OFMARKER_EN:
-               len = sprintf(buf, "%d\n", conn->ofmarker_en);
+               len = sysfs_emit(buf, "%d\n", conn->ofmarker_en);
                break;
        case ISCSI_PARAM_EXP_STATSN:
-               len = sprintf(buf, "%u\n", conn->exp_statsn);
+               len = sysfs_emit(buf, "%u\n", conn->exp_statsn);
                break;
        case ISCSI_PARAM_PERSISTENT_PORT:
-               len = sprintf(buf, "%d\n", conn->persistent_port);
+               len = sysfs_emit(buf, "%d\n", conn->persistent_port);
                break;
        case ISCSI_PARAM_PERSISTENT_ADDRESS:
-               len = sprintf(buf, "%s\n", conn->persistent_address);
+               len = sysfs_emit(buf, "%s\n", conn->persistent_address);
                break;
        case ISCSI_PARAM_STATSN:
-               len = sprintf(buf, "%u\n", conn->statsn);
+               len = sysfs_emit(buf, "%u\n", conn->statsn);
                break;
        case ISCSI_PARAM_MAX_SEGMENT_SIZE:
-               len = sprintf(buf, "%u\n", conn->max_segment_size);
+               len = sysfs_emit(buf, "%u\n", conn->max_segment_size);
                break;
        case ISCSI_PARAM_KEEPALIVE_TMO:
-               len = sprintf(buf, "%u\n", conn->keepalive_tmo);
+               len = sysfs_emit(buf, "%u\n", conn->keepalive_tmo);
                break;
        case ISCSI_PARAM_LOCAL_PORT:
-               len = sprintf(buf, "%u\n", conn->local_port);
+               len = sysfs_emit(buf, "%u\n", conn->local_port);
                break;
        case ISCSI_PARAM_TCP_TIMESTAMP_STAT:
-               len = sprintf(buf, "%u\n", conn->tcp_timestamp_stat);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_stat);
                break;
        case ISCSI_PARAM_TCP_NAGLE_DISABLE:
-               len = sprintf(buf, "%u\n", conn->tcp_nagle_disable);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_nagle_disable);
                break;
        case ISCSI_PARAM_TCP_WSF_DISABLE:
-               len = sprintf(buf, "%u\n", conn->tcp_wsf_disable);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_wsf_disable);
                break;
        case ISCSI_PARAM_TCP_TIMER_SCALE:
-               len = sprintf(buf, "%u\n", conn->tcp_timer_scale);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_timer_scale);
                break;
        case ISCSI_PARAM_TCP_TIMESTAMP_EN:
-               len = sprintf(buf, "%u\n", conn->tcp_timestamp_en);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_en);
                break;
        case ISCSI_PARAM_IP_FRAGMENT_DISABLE:
-               len = sprintf(buf, "%u\n", conn->fragment_disable);
+               len = sysfs_emit(buf, "%u\n", conn->fragment_disable);
                break;
        case ISCSI_PARAM_IPV4_TOS:
-               len = sprintf(buf, "%u\n", conn->ipv4_tos);
+               len = sysfs_emit(buf, "%u\n", conn->ipv4_tos);
                break;
        case ISCSI_PARAM_IPV6_TC:
-               len = sprintf(buf, "%u\n", conn->ipv6_traffic_class);
+               len = sysfs_emit(buf, "%u\n", conn->ipv6_traffic_class);
                break;
        case ISCSI_PARAM_IPV6_FLOW_LABEL:
-               len = sprintf(buf, "%u\n", conn->ipv6_flow_label);
+               len = sysfs_emit(buf, "%u\n", conn->ipv6_flow_label);
                break;
        case ISCSI_PARAM_IS_FW_ASSIGNED_IPV6:
-               len = sprintf(buf, "%u\n", conn->is_fw_assigned_ipv6);
+               len = sysfs_emit(buf, "%u\n", conn->is_fw_assigned_ipv6);
                break;
        case ISCSI_PARAM_TCP_XMIT_WSF:
-               len = sprintf(buf, "%u\n", conn->tcp_xmit_wsf);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_xmit_wsf);
                break;
        case ISCSI_PARAM_TCP_RECV_WSF:
-               len = sprintf(buf, "%u\n", conn->tcp_recv_wsf);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_recv_wsf);
                break;
        case ISCSI_PARAM_LOCAL_IPADDR:
-               len = sprintf(buf, "%s\n", conn->local_ipaddr);
+               len = sysfs_emit(buf, "%s\n", conn->local_ipaddr);
                break;
        default:
                return -ENOSYS;
@@ -3707,13 +3707,13 @@ int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
 
        switch (param) {
        case ISCSI_HOST_PARAM_NETDEV_NAME:
-               len = sprintf(buf, "%s\n", ihost->netdev);
+               len = sysfs_emit(buf, "%s\n", ihost->netdev);
                break;
        case ISCSI_HOST_PARAM_HWADDRESS:
-               len = sprintf(buf, "%s\n", ihost->hwaddress);
+               len = sysfs_emit(buf, "%s\n", ihost->hwaddress);
                break;
        case ISCSI_HOST_PARAM_INITIATOR_NAME:
-               len = sprintf(buf, "%s\n", ihost->initiatorname);
+               len = sysfs_emit(buf, "%s\n", ihost->initiatorname);
                break;
        default:
                return -ENOSYS;
index 969d24d..91074fd 100644 (file)
@@ -132,7 +132,11 @@ show_transport_handle(struct device *dev, struct device_attribute *attr,
                      char *buf)
 {
        struct iscsi_internal *priv = dev_to_iscsi_internal(dev);
-       return sprintf(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport));
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+       return sysfs_emit(buf, "%llu\n",
+                 (unsigned long long)iscsi_handle(priv->iscsi_transport));
 }
 static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL);
 
@@ -142,7 +146,7 @@ show_transport_##name(struct device *dev,                           \
                      struct device_attribute *attr,char *buf)          \
 {                                                                      \
        struct iscsi_internal *priv = dev_to_iscsi_internal(dev);       \
-       return sprintf(buf, format"\n", priv->iscsi_transport->name);   \
+       return sysfs_emit(buf, format"\n", priv->iscsi_transport->name);\
 }                                                                      \
 static DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL);
 
@@ -183,7 +187,7 @@ static ssize_t
 show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf)
 {
        struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
-       return sprintf(buf, "%llu\n", (unsigned long long) ep->id);
+       return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id);
 }
 static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL);
 
@@ -2880,6 +2884,9 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
        struct iscsi_cls_session *session;
        int err = 0, value = 0;
 
+       if (ev->u.set_param.len > PAGE_SIZE)
+               return -EINVAL;
+
        session = iscsi_session_lookup(ev->u.set_param.sid);
        conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid);
        if (!conn || !session)
@@ -3027,6 +3034,9 @@ iscsi_set_host_param(struct iscsi_transport *transport,
        if (!transport->set_host_param)
                return -ENOSYS;
 
+       if (ev->u.set_host_param.len > PAGE_SIZE)
+               return -EINVAL;
+
        shost = scsi_host_lookup(ev->u.set_host_param.host_no);
        if (!shost) {
                printk(KERN_ERR "set_host_param could not find host no %u\n",
@@ -3614,6 +3624,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 {
        int err = 0;
        u32 portid;
+       u32 pdu_len;
        struct iscsi_uevent *ev = nlmsg_data(nlh);
        struct iscsi_transport *transport = NULL;
        struct iscsi_internal *priv;
@@ -3621,6 +3632,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
        struct iscsi_cls_conn *conn;
        struct iscsi_endpoint *ep = NULL;
 
+       if (!netlink_capable(skb, CAP_SYS_ADMIN))
+               return -EPERM;
+
        if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE)
                *group = ISCSI_NL_GRP_UIP;
        else
@@ -3753,6 +3767,14 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
                        err = -EINVAL;
                break;
        case ISCSI_UEVENT_SEND_PDU:
+               pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
+
+               if ((ev->u.send_pdu.hdr_size > pdu_len) ||
+                   (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) {
+                       err = -EINVAL;
+                       break;
+               }
+
                conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid);
                if (conn) {
                        mutex_lock(&conn_mutex);
@@ -3957,7 +3979,7 @@ static ssize_t show_conn_state(struct device *dev,
            conn->state < ARRAY_SIZE(connection_state_names))
                state = connection_state_names[conn->state];
 
-       return sprintf(buf, "%s\n", state);
+       return sysfs_emit(buf, "%s\n", state);
 }
 static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state,
                        NULL);
@@ -4185,7 +4207,7 @@ show_priv_session_state(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
-       return sprintf(buf, "%s\n", iscsi_session_state_name(session->state));
+       return sysfs_emit(buf, "%s\n", iscsi_session_state_name(session->state));
 }
 static ISCSI_CLASS_ATTR(priv_sess, state, S_IRUGO, show_priv_session_state,
                        NULL);
@@ -4194,7 +4216,7 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
-       return sprintf(buf, "%d\n", session->creator);
+       return sysfs_emit(buf, "%d\n", session->creator);
 }
 static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator,
                        NULL);
@@ -4203,7 +4225,7 @@ show_priv_session_target_id(struct device *dev, struct device_attribute *attr,
                            char *buf)
 {
        struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
-       return sprintf(buf, "%d\n", session->target_id);
+       return sysfs_emit(buf, "%d\n", session->target_id);
 }
 static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO,
                        show_priv_session_target_id, NULL);
@@ -4216,8 +4238,8 @@ show_priv_session_##field(struct device *dev,                             \
        struct iscsi_cls_session *session =                             \
                        iscsi_dev_to_session(dev->parent);              \
        if (session->field == -1)                                       \
-               return sprintf(buf, "off\n");                           \
-       return sprintf(buf, format"\n", session->field);                \
+               return sysfs_emit(buf, "off\n");                        \
+       return sysfs_emit(buf, format"\n", session->field);             \
 }
 
 #define iscsi_priv_session_attr_store(field)                           \
index 76820d0..06bac8b 100644 (file)
@@ -48,8 +48,6 @@ struct sdw_intel {
 #endif
 };
 
-#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE      BIT(1)
-
 int intel_master_startup(struct platform_device *pdev);
 int intel_master_process_wakeen_event(struct platform_device *pdev);
 
index bc8520e..05b726c 100644 (file)
 #include "cadence_master.h"
 #include "intel.h"
 
-#define SDW_LINK_TYPE          4 /* from Intel ACPI documentation */
-#define SDW_MAX_LINKS          4
 #define SDW_SHIM_LCAP          0x0
 #define SDW_SHIM_BASE          0x2C000
 #define SDW_ALH_BASE           0x2C800
 #define SDW_LINK_BASE          0x30000
 #define SDW_LINK_SIZE          0x10000
 
-static int ctrl_link_mask;
-module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444);
-MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)");
-
-static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
-{
-       struct fwnode_handle *link;
-       char name[32];
-       u32 quirk_mask = 0;
-
-       /* Find master handle */
-       snprintf(name, sizeof(name),
-                "mipi-sdw-link-%d-subproperties", i);
-
-       link = fwnode_get_named_child_node(fw_node, name);
-       if (!link)
-               return false;
-
-       fwnode_property_read_u32(link,
-                                "intel-quirk-mask",
-                                &quirk_mask);
-
-       if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
-               return false;
-
-       return true;
-}
-
 static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx)
 {
        struct sdw_intel_link_res *link = ctx->links;
@@ -81,74 +51,6 @@ static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx)
        return 0;
 }
 
-static int
-sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
-{
-       struct acpi_device *adev;
-       int ret, i;
-       u8 count;
-
-       if (acpi_bus_get_device(info->handle, &adev))
-               return -EINVAL;
-
-       /* Found controller, find links supported */
-       count = 0;
-       ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
-                                           "mipi-sdw-master-count", &count, 1);
-
-       /*
-        * In theory we could check the number of links supported in
-        * hardware, but in that step we cannot assume SoundWire IP is
-        * powered.
-        *
-        * In addition, if the BIOS doesn't even provide this
-        * 'master-count' property then all the inits based on link
-        * masks will fail as well.
-        *
-        * We will check the hardware capabilities in the startup() step
-        */
-
-       if (ret) {
-               dev_err(&adev->dev,
-                       "Failed to read mipi-sdw-master-count: %d\n", ret);
-               return -EINVAL;
-       }
-
-       /* Check count is within bounds */
-       if (count > SDW_MAX_LINKS) {
-               dev_err(&adev->dev, "Link count %d exceeds max %d\n",
-                       count, SDW_MAX_LINKS);
-               return -EINVAL;
-       }
-
-       if (!count) {
-               dev_warn(&adev->dev, "No SoundWire links detected\n");
-               return -EINVAL;
-       }
-       dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count);
-
-       info->count = count;
-       info->link_mask = 0;
-
-       for (i = 0; i < count; i++) {
-               if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) {
-                       dev_dbg(&adev->dev,
-                               "Link %d masked, will not be enabled\n", i);
-                       continue;
-               }
-
-               if (!is_link_enabled(acpi_fwnode_handle(adev), i)) {
-                       dev_dbg(&adev->dev,
-                               "Link %d not selected in firmware\n", i);
-                       continue;
-               }
-
-               info->link_mask |= BIT(i);
-       }
-
-       return 0;
-}
-
 #define HDA_DSP_REG_ADSPIC2             (0x10)
 #define HDA_DSP_REG_ADSPIS2             (0x14)
 #define HDA_DSP_REG_ADSPIC2_SNDW        BIT(5)
@@ -357,66 +259,6 @@ sdw_intel_startup_controller(struct sdw_intel_ctx *ctx)
        return 0;
 }
 
-static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
-                                    void *cdata, void **return_value)
-{
-       struct sdw_intel_acpi_info *info = cdata;
-       struct acpi_device *adev;
-       acpi_status status;
-       u64 adr;
-
-       status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr);
-       if (ACPI_FAILURE(status))
-               return AE_OK; /* keep going */
-
-       if (acpi_bus_get_device(handle, &adev)) {
-               pr_err("%s: Couldn't find ACPI handle\n", __func__);
-               return AE_NOT_FOUND;
-       }
-
-       info->handle = handle;
-
-       /*
-        * On some Intel platforms, multiple children of the HDAS
-        * device can be found, but only one of them is the SoundWire
-        * controller. The SNDW device is always exposed with
-        * Name(_ADR, 0x40000000), with bits 31..28 representing the
-        * SoundWire link so filter accordingly
-        */
-       if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
-               return AE_OK; /* keep going */
-
-       /* device found, stop namespace walk */
-       return AE_CTRL_TERMINATE;
-}
-
-/**
- * sdw_intel_acpi_scan() - SoundWire Intel init routine
- * @parent_handle: ACPI parent handle
- * @info: description of what firmware/DSDT tables expose
- *
- * This scans the namespace and queries firmware to figure out which
- * links to enable. A follow-up use of sdw_intel_probe() and
- * sdw_intel_startup() is required for creation of devices and bus
- * startup
- */
-int sdw_intel_acpi_scan(acpi_handle *parent_handle,
-                       struct sdw_intel_acpi_info *info)
-{
-       acpi_status status;
-
-       info->handle = NULL;
-       status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
-                                    parent_handle, 1,
-                                    sdw_intel_acpi_cb,
-                                    NULL, info, NULL);
-       if (ACPI_FAILURE(status) || info->handle == NULL)
-               return -ENODEV;
-
-       return sdw_intel_scan_controller(info);
-}
-EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SOUNDWIRE_INTEL_INIT);
-
 /**
  * sdw_intel_probe() - SoundWire Intel probe routine
  * @res: resource data
index 5064be5..744b99d 100644 (file)
@@ -1162,6 +1162,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
        spin_lock(&sinfo->lock);
        spin_lock(&cache->lock);
 
+       if (cache->swap_extents) {
+               ret = -ETXTBSY;
+               goto out;
+       }
+
        if (cache->ro) {
                cache->ro++;
                ret = 0;
@@ -2307,7 +2312,7 @@ again:
        }
 
        ret = inc_block_group_ro(cache, 0);
-       if (!do_chunk_alloc)
+       if (!do_chunk_alloc || ret == -ETXTBSY)
                goto unlock_out;
        if (!ret)
                goto out;
@@ -2316,6 +2321,8 @@ again:
        if (ret < 0)
                goto out;
        ret = inc_block_group_ro(cache, 0);
+       if (ret == -ETXTBSY)
+               goto unlock_out;
 out:
        if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
                alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
@@ -3406,6 +3413,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                ASSERT(list_empty(&block_group->io_list));
                ASSERT(list_empty(&block_group->bg_list));
                ASSERT(refcount_read(&block_group->refs) == 1);
+               ASSERT(block_group->swap_extents == 0);
                btrfs_put_block_group(block_group);
 
                spin_lock(&info->block_group_cache_lock);
@@ -3472,3 +3480,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
                __btrfs_remove_free_space_cache(block_group->free_space_ctl);
        }
 }
+
+bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg)
+{
+       bool ret = true;
+
+       spin_lock(&bg->lock);
+       if (bg->ro)
+               ret = false;
+       else
+               bg->swap_extents++;
+       spin_unlock(&bg->lock);
+
+       return ret;
+}
+
+void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount)
+{
+       spin_lock(&bg->lock);
+       ASSERT(!bg->ro);
+       ASSERT(bg->swap_extents >= amount);
+       bg->swap_extents -= amount;
+       spin_unlock(&bg->lock);
+}
index 2967842..3ecc337 100644 (file)
@@ -186,6 +186,12 @@ struct btrfs_block_group {
        /* Flag indicating this block group is placed on a sequential zone */
        bool seq_zone;
 
+       /*
+        * Number of extents in this block group used for swap files.
+        * All accesses protected by the spinlock 'lock'.
+        */
+       int swap_extents;
+
        /* Record locked full stripes for RAID5/6 block group */
        struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
 
@@ -312,4 +318,7 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
 void btrfs_freeze_block_group(struct btrfs_block_group *cache);
 void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
 
+bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
+void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
+
 #endif /* BTRFS_BLOCK_GROUP_H */
index 6d203ac..3f4c832 100644 (file)
@@ -141,6 +141,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        const u32 csum_size = fs_info->csum_size;
+       const u32 sectorsize = fs_info->sectorsize;
        struct page *page;
        unsigned long i;
        char *kaddr;
@@ -154,22 +155,34 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
        shash->tfm = fs_info->csum_shash;
 
        for (i = 0; i < cb->nr_pages; i++) {
+               u32 pg_offset;
+               u32 bytes_left = PAGE_SIZE;
                page = cb->compressed_pages[i];
 
-               kaddr = kmap_atomic(page);
-               crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
-               kunmap_atomic(kaddr);
-
-               if (memcmp(&csum, cb_sum, csum_size)) {
-                       btrfs_print_data_csum_error(inode, disk_start,
-                                       csum, cb_sum, cb->mirror_num);
-                       if (btrfs_io_bio(bio)->device)
-                               btrfs_dev_stat_inc_and_print(
-                                       btrfs_io_bio(bio)->device,
-                                       BTRFS_DEV_STAT_CORRUPTION_ERRS);
-                       return -EIO;
+               /* Determine the remaining bytes inside the page first */
+               if (i == cb->nr_pages - 1)
+                       bytes_left = cb->compressed_len - i * PAGE_SIZE;
+
+               /* Hash through the page sector by sector */
+               for (pg_offset = 0; pg_offset < bytes_left;
+                    pg_offset += sectorsize) {
+                       kaddr = kmap_atomic(page);
+                       crypto_shash_digest(shash, kaddr + pg_offset,
+                                           sectorsize, csum);
+                       kunmap_atomic(kaddr);
+
+                       if (memcmp(&csum, cb_sum, csum_size) != 0) {
+                               btrfs_print_data_csum_error(inode, disk_start,
+                                               csum, cb_sum, cb->mirror_num);
+                               if (btrfs_io_bio(bio)->device)
+                                       btrfs_dev_stat_inc_and_print(
+                                               btrfs_io_bio(bio)->device,
+                                               BTRFS_DEV_STAT_CORRUPTION_ERRS);
+                               return -EIO;
+                       }
+                       cb_sum += csum_size;
+                       disk_start += sectorsize;
                }
-               cb_sum += csum_size;
        }
        return 0;
 }
@@ -640,7 +653,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree,
                                   page_offset(bio_first_page_all(bio)),
-                                  PAGE_SIZE);
+                                  fs_info->sectorsize);
        read_unlock(&em_tree->lock);
        if (!em)
                return BLK_STS_IOERR;
@@ -698,19 +711,30 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        refcount_set(&cb->pending_bios, 1);
 
        for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+               u32 pg_len = PAGE_SIZE;
                int submit = 0;
 
+               /*
+                * To handle subpage case, we need to make sure the bio only
+                * covers the range we need.
+                *
+                * If we're at the last page, truncate the length to only cover
+                * the remaining part.
+                */
+               if (pg_index == nr_pages - 1)
+                       pg_len = min_t(u32, PAGE_SIZE,
+                                       compressed_len - pg_index * PAGE_SIZE);
+
                page = cb->compressed_pages[pg_index];
                page->mapping = inode->i_mapping;
                page->index = em_start >> PAGE_SHIFT;
 
                if (comp_bio->bi_iter.bi_size)
-                       submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE,
+                       submit = btrfs_bio_fits_in_stripe(page, pg_len,
                                                          comp_bio, 0);
 
                page->mapping = NULL;
-               if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
-                   PAGE_SIZE) {
+               if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) {
                        unsigned int nr_sectors;
 
                        ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
@@ -743,9 +767,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        comp_bio->bi_private = cb;
                        comp_bio->bi_end_io = end_compressed_bio_read;
 
-                       bio_add_page(comp_bio, page, PAGE_SIZE, 0);
+                       bio_add_page(comp_bio, page, pg_len, 0);
                }
-               cur_disk_byte += PAGE_SIZE;
+               cur_disk_byte += pg_len;
        }
 
        ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
@@ -1237,7 +1261,6 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
        unsigned long prev_start_byte;
        unsigned long working_bytes = total_out - buf_start;
        unsigned long bytes;
-       char *kaddr;
        struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter);
 
        /*
@@ -1268,9 +1291,8 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
                                PAGE_SIZE - (buf_offset % PAGE_SIZE));
                bytes = min(bytes, working_bytes);
 
-               kaddr = kmap_atomic(bvec.bv_page);
-               memcpy(kaddr + bvec.bv_offset, buf + buf_offset, bytes);
-               kunmap_atomic(kaddr);
+               memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + buf_offset,
+                              bytes);
                flush_dcache_page(bvec.bv_page);
 
                buf_offset += bytes;
index bd65935..9ae776a 100644 (file)
@@ -524,6 +524,11 @@ struct btrfs_swapfile_pin {
         * points to a struct btrfs_device.
         */
        bool is_block_group;
+       /*
+        * Only used when 'is_block_group' is true and it is the number of
+        * extents used by a swapfile for this block group ('ptr' field).
+        */
+       int bg_extent_count;
 };
 
 bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
index ec0b50b..bf25401 100644 (file)
@@ -627,7 +627,8 @@ static int btrfs_delayed_inode_reserve_metadata(
         */
        if (!src_rsv || (!trans->bytes_reserved &&
                         src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
-               ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+               ret = btrfs_qgroup_reserve_meta(root, num_bytes,
+                                         BTRFS_QGROUP_RSV_META_PREALLOC, true);
                if (ret < 0)
                        return ret;
                ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
@@ -649,7 +650,7 @@ static int btrfs_delayed_inode_reserve_metadata(
                                                      btrfs_ino(inode),
                                                      num_bytes, 1);
                } else {
-                       btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
+                       btrfs_qgroup_free_meta_prealloc(root, num_bytes);
                }
                return ret;
        }
index 4dfb3ea..4671c99 100644 (file)
@@ -3008,12 +3008,23 @@ readpage_ok:
                if (likely(uptodate)) {
                        loff_t i_size = i_size_read(inode);
                        pgoff_t end_index = i_size >> PAGE_SHIFT;
-                       unsigned off;
 
-                       /* Zero out the end if this page straddles i_size */
-                       off = offset_in_page(i_size);
-                       if (page->index == end_index && off)
-                               zero_user_segment(page, off, PAGE_SIZE);
+                       /*
+                        * Zero out the remaining part if this range straddles
+                        * i_size.
+                        *
+                        * Here we should only zero the range inside the bvec,
+                        * not touch anything else.
+                        *
+                        * NOTE: i_size is exclusive while end is inclusive.
+                        */
+                       if (page->index == end_index && i_size <= end) {
+                               u32 zero_start = max(offset_in_page(i_size),
+                                                    offset_in_page(end));
+
+                               zero_user_segment(page, zero_start,
+                                                 offset_in_page(end) + 1);
+                       }
                }
                ASSERT(bio_offset + len > bio_offset);
                bio_offset += len;
index bf2c51a..0e155f0 100644 (file)
@@ -3260,8 +3260,11 @@ reserve_space:
                        goto out;
                ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
                                                alloc_start, bytes_to_reserve);
-               if (ret)
+               if (ret) {
+                       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+                                            lockend, &cached_state);
                        goto out;
+               }
                ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
                                                alloc_end - alloc_start,
                                                i_blocksize(inode),
index 5400294..9988dec 100644 (file)
@@ -2555,7 +2555,12 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
        to_unusable = size - to_free;
 
        ctl->free_space += to_free;
-       block_group->zone_unusable += to_unusable;
+       /*
+        * If the block group is read-only, we should account freed space into
+        * bytes_readonly.
+        */
+       if (!block_group->ro)
+               block_group->zone_unusable += to_unusable;
        spin_unlock(&ctl->tree_lock);
        if (!used) {
                spin_lock(&block_group->lock);
@@ -2801,8 +2806,10 @@ static void __btrfs_return_cluster_to_free_space(
        struct rb_node *node;
 
        spin_lock(&cluster->lock);
-       if (cluster->block_group != block_group)
-               goto out;
+       if (cluster->block_group != block_group) {
+               spin_unlock(&cluster->lock);
+               return;
+       }
 
        cluster->block_group = NULL;
        cluster->window_start = 0;
@@ -2840,8 +2847,6 @@ static void __btrfs_return_cluster_to_free_space(
                                   entry->offset, &entry->offset_index, bitmap);
        }
        cluster->root = RB_ROOT;
-
-out:
        spin_unlock(&cluster->lock);
        btrfs_put_block_group(block_group);
 }
@@ -3125,8 +3130,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
                        entry->bytes -= bytes;
                }
 
-               if (entry->bytes == 0)
-                       rb_erase(&entry->offset_index, &cluster->root);
                break;
        }
 out:
@@ -3143,7 +3146,10 @@ out:
        ctl->free_space -= bytes;
        if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
                ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
+
+       spin_lock(&cluster->lock);
        if (entry->bytes == 0) {
+               rb_erase(&entry->offset_index, &cluster->root);
                ctl->free_extents--;
                if (entry->bitmap) {
                        kmem_cache_free(btrfs_free_space_bitmap_cachep,
@@ -3156,6 +3162,7 @@ out:
                kmem_cache_free(btrfs_free_space_cachep, entry);
        }
 
+       spin_unlock(&cluster->lock);
        spin_unlock(&ctl->tree_lock);
 
        return ret;
index 2e1c282..35bfa05 100644 (file)
@@ -1674,9 +1674,6 @@ next_slot:
                         */
                        btrfs_release_path(path);
 
-                       /* If extent is RO, we must COW it */
-                       if (btrfs_extent_readonly(fs_info, disk_bytenr))
-                               goto out_check;
                        ret = btrfs_cross_ref_exist(root, ino,
                                                    found_key.offset -
                                                    extent_offset, disk_bytenr, false);
@@ -1723,6 +1720,7 @@ next_slot:
                                WARN_ON_ONCE(freespace_inode);
                                goto out_check;
                        }
+                       /* If the extent's block group is RO, we must COW */
                        if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
                                goto out_check;
                        nocow = true;
@@ -6085,7 +6083,7 @@ static int btrfs_dirty_inode(struct inode *inode)
                return PTR_ERR(trans);
 
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
-       if (ret && ret == -ENOSPC) {
+       if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
                /* whoops, lets try again with the full transaction */
                btrfs_end_transaction(trans);
                trans = btrfs_start_transaction(root, 1);
@@ -10200,6 +10198,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
        sp->ptr = ptr;
        sp->inode = inode;
        sp->is_block_group = is_block_group;
+       sp->bg_extent_count = 1;
 
        spin_lock(&fs_info->swapfile_pins_lock);
        p = &fs_info->swapfile_pins.rb_node;
@@ -10213,6 +10212,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
                           (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
                        p = &(*p)->rb_right;
                } else {
+                       if (is_block_group)
+                               entry->bg_extent_count++;
                        spin_unlock(&fs_info->swapfile_pins_lock);
                        kfree(sp);
                        return 1;
@@ -10238,8 +10239,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode)
                sp = rb_entry(node, struct btrfs_swapfile_pin, node);
                if (sp->inode == inode) {
                        rb_erase(&sp->node, &fs_info->swapfile_pins);
-                       if (sp->is_block_group)
+                       if (sp->is_block_group) {
+                               btrfs_dec_block_group_swap_extents(sp->ptr,
+                                                          sp->bg_extent_count);
                                btrfs_put_block_group(sp->ptr);
+                       }
                        kfree(sp);
                }
                node = next;
@@ -10300,7 +10304,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
                               sector_t *span)
 {
        struct inode *inode = file_inode(file);
-       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_state *cached_state = NULL;
        struct extent_map *em = NULL;
@@ -10351,13 +10356,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
           "cannot activate swapfile while exclusive operation is running");
                return -EBUSY;
        }
+
+       /*
+        * Prevent snapshot creation while we are activating the swap file.
+        * We do not want to race with snapshot creation. If snapshot creation
+        * already started before we bumped nr_swapfiles from 0 to 1 and
+        * completes before the first write into the swap file after it is
+        * activated, than that write would fallback to COW.
+        */
+       if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) {
+               btrfs_exclop_finish(fs_info);
+               btrfs_warn(fs_info,
+          "cannot activate swapfile because snapshot creation is in progress");
+               return -EINVAL;
+       }
        /*
         * Snapshots can create extents which require COW even if NODATACOW is
         * set. We use this counter to prevent snapshots. We must increment it
         * before walking the extents because we don't want a concurrent
         * snapshot to run after we've already checked the extents.
         */
-       atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
+       atomic_inc(&root->nr_swapfiles);
 
        isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
 
@@ -10454,6 +10473,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
                        goto out;
                }
 
+               if (!btrfs_inc_block_group_swap_extents(bg)) {
+                       btrfs_warn(fs_info,
+                          "block group for swapfile at %llu is read-only%s",
+                          bg->start,
+                          atomic_read(&fs_info->scrubs_running) ?
+                                      " (scrub running)" : "");
+                       btrfs_put_block_group(bg);
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                ret = btrfs_add_swapfile_pin(inode, bg, true);
                if (ret) {
                        btrfs_put_block_group(bg);
@@ -10492,6 +10522,8 @@ out:
        if (ret)
                btrfs_swap_deactivate(file);
 
+       btrfs_drew_write_unlock(&root->snapshot_lock);
+
        btrfs_exclop_finish(fs_info);
 
        if (ret)
index 072e777..e8d53fe 100644 (file)
@@ -1936,7 +1936,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
        if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
                readonly = true;
        if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
-               if (vol_args->size > PAGE_SIZE) {
+               u64 nums;
+
+               if (vol_args->size < sizeof(*inherit) ||
+                   vol_args->size > PAGE_SIZE) {
                        ret = -EINVAL;
                        goto free_args;
                }
@@ -1945,6 +1948,20 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
                        ret = PTR_ERR(inherit);
                        goto free_args;
                }
+
+               if (inherit->num_qgroups > PAGE_SIZE ||
+                   inherit->num_ref_copies > PAGE_SIZE ||
+                   inherit->num_excl_copies > PAGE_SIZE) {
+                       ret = -EINVAL;
+                       goto free_inherit;
+               }
+
+               nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
+                      2 * inherit->num_excl_copies;
+               if (vol_args->size != struct_size(inherit, qgroups, nums)) {
+                       ret = -EINVAL;
+                       goto free_inherit;
+               }
        }
 
        ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
index aa9cd11..9084a95 100644 (file)
@@ -467,7 +467,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
        destlen = min_t(unsigned long, destlen, PAGE_SIZE);
        bytes = min_t(unsigned long, destlen, out_len - start_byte);
 
-       kaddr = kmap_atomic(dest_page);
+       kaddr = kmap_local_page(dest_page);
        memcpy(kaddr, workspace->buf + start_byte, bytes);
 
        /*
@@ -477,7 +477,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
         */
        if (bytes < destlen)
                memset(kaddr+bytes, 0, destlen-bytes);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 out:
        return ret;
 }
index 808370a..14ff388 100644 (file)
@@ -3841,8 +3841,8 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
        return num_bytes;
 }
 
-static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
-                               enum btrfs_qgroup_rsv_type type, bool enforce)
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+                             enum btrfs_qgroup_rsv_type type, bool enforce)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
@@ -3873,14 +3873,14 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 {
        int ret;
 
-       ret = qgroup_reserve_meta(root, num_bytes, type, enforce);
+       ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
        if (ret <= 0 && ret != -EDQUOT)
                return ret;
 
        ret = try_flush_qgroup(root);
        if (ret < 0)
                return ret;
-       return qgroup_reserve_meta(root, num_bytes, type, enforce);
+       return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
 }
 
 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
index 50dea9a..7283e4f 100644 (file)
@@ -361,6 +361,8 @@ int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
 int btrfs_qgroup_free_data(struct btrfs_inode *inode,
                           struct extent_changeset *reserved, u64 start,
                           u64 len);
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+                             enum btrfs_qgroup_rsv_type type, bool enforce);
 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
                                enum btrfs_qgroup_rsv_type type, bool enforce);
 /* Reserve metadata space for pertrans and prealloc type */
index 8ec34ec..8c31357 100644 (file)
@@ -249,8 +249,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
 static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
 {
        int i;
-       char *s;
-       char *d;
        int ret;
 
        ret = alloc_rbio_pages(rbio);
@@ -261,13 +259,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
                if (!rbio->bio_pages[i])
                        continue;
 
-               s = kmap(rbio->bio_pages[i]);
-               d = kmap(rbio->stripe_pages[i]);
-
-               copy_page(d, s);
-
-               kunmap(rbio->bio_pages[i]);
-               kunmap(rbio->stripe_pages[i]);
+               copy_highpage(rbio->stripe_pages[i], rbio->bio_pages[i]);
                SetPageUptodate(rbio->stripe_pages[i]);
        }
        set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -2359,16 +2351,21 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
        SetPageUptodate(p_page);
 
        if (has_qstripe) {
+               /* RAID6, allocate and map temp space for the Q stripe */
                q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
                if (!q_page) {
                        __free_page(p_page);
                        goto cleanup;
                }
                SetPageUptodate(q_page);
+               pointers[rbio->real_stripes - 1] = kmap(q_page);
        }
 
        atomic_set(&rbio->error, 0);
 
+       /* Map the parity stripe just once */
+       pointers[nr_data] = kmap(p_page);
+
        for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
                struct page *p;
                void *parity;
@@ -2378,16 +2375,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
                        pointers[stripe] = kmap(p);
                }
 
-               /* then add the parity stripe */
-               pointers[stripe++] = kmap(p_page);
-
                if (has_qstripe) {
-                       /*
-                        * raid6, add the qstripe and call the
-                        * library function to fill in our p/q
-                        */
-                       pointers[stripe++] = kmap(q_page);
-
+                       /* RAID6, call the library function to fill in our P/Q */
                        raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
                                                pointers);
                } else {
@@ -2408,12 +2397,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 
                for (stripe = 0; stripe < nr_data; stripe++)
                        kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
-               kunmap(p_page);
        }
 
+       kunmap(p_page);
        __free_page(p_page);
-       if (q_page)
+       if (q_page) {
+               kunmap(q_page);
                __free_page(q_page);
+       }
 
 writeback:
        /*
index 2b490be..8e026de 100644 (file)
@@ -218,11 +218,11 @@ static void __print_stack_trace(struct btrfs_fs_info *fs_info,
        stack_trace_print(ra->trace, ra->trace_len, 2);
 }
 #else
-static void inline __save_stack_trace(struct ref_action *ra)
+static inline void __save_stack_trace(struct ref_action *ra)
 {
 }
 
-static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
+static inline void __print_stack_trace(struct btrfs_fs_info *fs_info,
                                       struct ref_action *ra)
 {
        btrfs_err(fs_info, "  ref-verify: no stacktrace support");
index b24396c..762881b 100644 (file)
@@ -106,12 +106,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
        set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
 
        if (comp_type == BTRFS_COMPRESS_NONE) {
-               char *map;
-
-               map = kmap(page);
-               memcpy(map, data_start, datal);
+               memcpy_to_page(page, 0, data_start, datal);
                flush_dcache_page(page);
-               kunmap(page);
        } else {
                ret = btrfs_decompress(comp_type, data_start, page, 0,
                                       inline_size, datal);
@@ -553,6 +549,24 @@ process_slot:
                 */
                btrfs_release_path(path);
 
+               /*
+                * When using NO_HOLES and we are cloning a range that covers
+                * only a hole (no extents) into a range beyond the current
+                * i_size, punching a hole in the target range will not create
+                * an extent map defining a hole, because the range starts at or
+                * beyond current i_size. If the file previously had an i_size
+                * greater than the new i_size set by this clone operation, we
+                * need to make sure the next fsync is a full fsync, so that it
+                * detects and logs a hole covering a range from the current
+                * i_size to the new i_size. If the clone range covers extents,
+                * besides a hole, then we know the full sync flag was already
+                * set by previous calls to btrfs_replace_file_extents() that
+                * replaced file extent items.
+                */
+               if (last_dest_end >= i_size_read(inode))
+                       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                               &BTRFS_I(inode)->runtime_flags);
+
                ret = btrfs_replace_file_extents(inode, path, last_dest_end,
                                destoff + len - 1, NULL, &trans);
                if (ret)
index 582df11..c2900eb 100644 (file)
@@ -3767,6 +3767,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                         * commit_transactions.
                         */
                        ro_set = 0;
+               } else if (ret == -ETXTBSY) {
+                       btrfs_warn(fs_info,
+                  "skipping scrub of block group %llu due to active swapfile",
+                                  cache->start);
+                       scrub_pause_off(fs_info);
+                       ret = 0;
+                       goto skip_unfreeze;
                } else {
                        btrfs_warn(fs_info,
                                   "failed setting block group ro: %d", ret);
@@ -3862,7 +3869,7 @@ done:
                } else {
                        spin_unlock(&cache->lock);
                }
-
+skip_unfreeze:
                btrfs_unfreeze_block_group(cache);
                btrfs_put_block_group(cache);
                if (ret)
index f878782..8f32385 100644 (file)
@@ -4932,7 +4932,6 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct inode *inode;
        struct page *page;
-       char *addr;
        pgoff_t index = offset >> PAGE_SHIFT;
        pgoff_t last_index;
        unsigned pg_offset = offset_in_page(offset);
@@ -4985,10 +4984,8 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
                        }
                }
 
-               addr = kmap(page);
-               memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset,
-                      cur_len);
-               kunmap(page);
+               memcpy_from_page(sctx->send_buf + sctx->send_size, page,
+                                pg_offset, cur_len);
                unlock_page(page);
                put_page(page);
                index++;
index f843564..f7a4ad8 100644 (file)
@@ -1918,8 +1918,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
        btrfs_resize_thread_pool(fs_info,
                fs_info->thread_pool_size, old_thread_pool_size);
 
-       if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
-           btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+       if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
+           (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
            (!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
                btrfs_warn(fs_info,
                "remount supports changing free space tree only from ro to rw");
index 582061c..f4ade82 100644 (file)
@@ -1453,22 +1453,14 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
                return -EUCLEAN;
        }
        for (; ptr < end; ptr += sizeof(*dref)) {
-               u64 root_objectid;
-               u64 owner;
                u64 offset;
-               u64 hash;
 
+               /*
+                * We cannot check the extent_data_ref hash due to possible
+                * overflow from the leaf due to hash collisions.
+                */
                dref = (struct btrfs_extent_data_ref *)ptr;
-               root_objectid = btrfs_extent_data_ref_root(leaf, dref);
-               owner = btrfs_extent_data_ref_objectid(leaf, dref);
                offset = btrfs_extent_data_ref_offset(leaf, dref);
-               hash = hash_extent_data_ref(root_objectid, owner, offset);
-               if (unlikely(hash != key->offset)) {
-                       extent_err(leaf, slot,
-       "invalid extent data ref hash, item has 0x%016llx key has 0x%016llx",
-                                  hash, key->offset);
-                       return -EUCLEAN;
-               }
                if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
                        extent_err(leaf, slot,
        "invalid extent data backref offset, have %llu expect aligned to %u",
index d90695c..2f1acc9 100644 (file)
@@ -3174,16 +3174,13 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
        root_log_ctx.log_transid = log_root_tree->log_transid;
 
        if (btrfs_is_zoned(fs_info)) {
-               mutex_lock(&fs_info->tree_root->log_mutex);
                if (!log_root_tree->node) {
                        ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
                        if (ret) {
-                               mutex_unlock(&fs_info->tree_log_mutex);
                                mutex_unlock(&log_root_tree->log_mutex);
                                goto out;
                        }
                }
-               mutex_unlock(&fs_info->tree_root->log_mutex);
        }
 
        /*
index b025102..8a45142 100644 (file)
@@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
+       const bool start_trans = (current->journal_info == NULL);
        int ret;
 
-       trans = btrfs_start_transaction(root, 2);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
+       if (start_trans) {
+               /*
+                * 1 unit for inserting/updating/deleting the xattr
+                * 1 unit for the inode item update
+                */
+               trans = btrfs_start_transaction(root, 2);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
+       } else {
+               /*
+                * This can happen when smack is enabled and a directory is being
+                * created. It happens through d_instantiate_new(), which calls
+                * smack_d_instantiate(), which in turn calls __vfs_setxattr() to
+                * set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the
+                * inode. We have already reserved space for the xattr and inode
+                * update at btrfs_mkdir(), so just use the transaction handle.
+                * We don't join or start a transaction, as that will reset the
+                * block_rsv of the handle and trigger a warning for the start
+                * case.
+                */
+               ASSERT(strncmp(name, XATTR_SECURITY_PREFIX,
+                              XATTR_SECURITY_PREFIX_LEN) == 0);
+               trans = current->journal_info;
+       }
 
        ret = btrfs_setxattr(trans, inode, name, value, size, flags);
        if (ret)
@@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
        BUG_ON(ret);
 out:
-       btrfs_end_transaction(trans);
+       if (start_trans)
+               btrfs_end_transaction(trans);
        return ret;
 }
 
index 05615a1..d524acf 100644 (file)
@@ -432,9 +432,8 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in,
                            PAGE_SIZE - (buf_offset % PAGE_SIZE));
                bytes = min(bytes, bytes_left);
 
-               kaddr = kmap_atomic(dest_page);
-               memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
-               kunmap_atomic(kaddr);
+               memcpy_to_page(dest_page, pg_offset,
+                              workspace->buf + buf_offset, bytes);
 
                pg_offset += bytes;
                bytes_left -= bytes;
index d0eb0c8..1f972b7 100644 (file)
@@ -269,7 +269,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
        sector_t sector = 0;
        struct blk_zone *zones = NULL;
        unsigned int i, nreported = 0, nr_zones;
-       unsigned int zone_sectors;
+       sector_t zone_sectors;
        char *model, *emulated;
        int ret;
 
@@ -658,7 +658,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
                               u64 *bytenr_ret)
 {
        struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES];
-       unsigned int zone_sectors;
+       sector_t zone_sectors;
        u32 sb_zone;
        int ret;
        u8 zone_sectors_shift;
index 9a48716..8e9626d 100644 (file)
@@ -688,10 +688,8 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
                bytes = min_t(unsigned long, destlen - pg_offset,
                                workspace->out_buf.size - buf_offset);
 
-               kaddr = kmap_atomic(dest_page);
-               memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
-                               bytes);
-               kunmap_atomic(kaddr);
+               memcpy_to_page(dest_page, pg_offset,
+                              workspace->out_buf.dst + buf_offset, bytes);
 
                pg_offset += bytes;
        }
index 44e2024..28868eb 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/rculist_nulls.h>
 #include <linux/cpu.h>
 #include <linux/tracehook.h>
+#include <linux/freezer.h>
 
 #include "../kernel/sched/sched.h"
 #include "io-wq.h"
@@ -52,9 +53,6 @@ struct io_worker {
        struct io_wq_work *cur_work;
        spinlock_t lock;
 
-       const struct cred *cur_creds;
-       const struct cred *saved_creds;
-
        struct completion ref_done;
 
        struct rcu_head rcu;
@@ -117,7 +115,10 @@ struct io_wq {
        struct io_wq_hash *hash;
 
        refcount_t refs;
-       struct completion done;
+       struct completion exited;
+
+       atomic_t worker_refs;
+       struct completion worker_done;
 
        struct hlist_node cpuhp_node;
 
@@ -126,6 +127,17 @@ struct io_wq {
 
 static enum cpuhp_state io_wq_online;
 
+struct io_cb_cancel_data {
+       work_cancel_fn *fn;
+       void *data;
+       int nr_running;
+       int nr_pending;
+       bool cancel_all;
+};
+
+static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+                                      struct io_cb_cancel_data *match);
+
 static bool io_worker_get(struct io_worker *worker)
 {
        return refcount_inc_not_zero(&worker->ref);
@@ -175,11 +187,6 @@ static void io_worker_exit(struct io_worker *worker)
        worker->flags = 0;
        preempt_enable();
 
-       if (worker->saved_creds) {
-               revert_creds(worker->saved_creds);
-               worker->cur_creds = worker->saved_creds = NULL;
-       }
-
        raw_spin_lock_irq(&wqe->lock);
        if (flags & IO_WORKER_F_FREE)
                hlist_nulls_del_rcu(&worker->nulls_node);
@@ -188,7 +195,9 @@ static void io_worker_exit(struct io_worker *worker)
        raw_spin_unlock_irq(&wqe->lock);
 
        kfree_rcu(worker, rcu);
-       io_wq_put(wqe->wq);
+       if (atomic_dec_and_test(&wqe->wq->worker_refs))
+               complete(&wqe->wq->worker_done);
+       do_exit(0);
 }
 
 static inline bool io_wqe_run_queue(struct io_wqe *wqe)
@@ -263,12 +272,6 @@ static void io_wqe_dec_running(struct io_worker *worker)
                io_wqe_wake_worker(wqe, acct);
 }
 
-static void io_worker_start(struct io_worker *worker)
-{
-       worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
-       io_wqe_inc_running(worker);
-}
-
 /*
  * Worker will start processing some work. Move it to the busy list, if
  * it's currently on the freelist
@@ -319,10 +322,6 @@ static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
                worker->flags |= IO_WORKER_F_FREE;
                hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
        }
-       if (worker->saved_creds) {
-               revert_creds(worker->saved_creds);
-               worker->cur_creds = worker->saved_creds = NULL;
-       }
 }
 
 static inline unsigned int io_get_work_hash(struct io_wq_work *work)
@@ -397,18 +396,6 @@ static void io_flush_signals(void)
        }
 }
 
-static void io_wq_switch_creds(struct io_worker *worker,
-                              struct io_wq_work *work)
-{
-       const struct cred *old_creds = override_creds(work->creds);
-
-       worker->cur_creds = work->creds;
-       if (worker->saved_creds)
-               put_cred(old_creds); /* creds set by previous switch */
-       else
-               worker->saved_creds = old_creds;
-}
-
 static void io_assign_current_work(struct io_worker *worker,
                                   struct io_wq_work *work)
 {
@@ -458,8 +445,6 @@ get_next:
                        unsigned int hash = io_get_work_hash(work);
 
                        next_hashed = wq_next_work(work);
-                       if (work->creds && worker->cur_creds != work->creds)
-                               io_wq_switch_creds(worker, work);
                        wq->do_work(work);
                        io_assign_current_work(worker, NULL);
 
@@ -495,8 +480,13 @@ static int io_wqe_worker(void *data)
        struct io_worker *worker = data;
        struct io_wqe *wqe = worker->wqe;
        struct io_wq *wq = wqe->wq;
+       char buf[TASK_COMM_LEN];
+
+       worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
+       io_wqe_inc_running(worker);
 
-       io_worker_start(worker);
+       sprintf(buf, "iou-wrk-%d", wq->task_pid);
+       set_task_comm(current, buf);
 
        while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
                set_current_state(TASK_INTERRUPTIBLE);
@@ -571,67 +561,11 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
        raw_spin_unlock_irq(&worker->wqe->lock);
 }
 
-static int task_thread(void *data, int index)
-{
-       struct io_worker *worker = data;
-       struct io_wqe *wqe = worker->wqe;
-       struct io_wqe_acct *acct = &wqe->acct[index];
-       struct io_wq *wq = wqe->wq;
-       char buf[TASK_COMM_LEN];
-
-       sprintf(buf, "iou-wrk-%d", wq->task_pid);
-       set_task_comm(current, buf);
-
-       current->pf_io_worker = worker;
-       worker->task = current;
-
-       set_cpus_allowed_ptr(current, cpumask_of_node(wqe->node));
-       current->flags |= PF_NO_SETAFFINITY;
-
-       raw_spin_lock_irq(&wqe->lock);
-       hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
-       list_add_tail_rcu(&worker->all_list, &wqe->all_list);
-       worker->flags |= IO_WORKER_F_FREE;
-       if (index == IO_WQ_ACCT_BOUND)
-               worker->flags |= IO_WORKER_F_BOUND;
-       if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
-               worker->flags |= IO_WORKER_F_FIXED;
-       acct->nr_workers++;
-       raw_spin_unlock_irq(&wqe->lock);
-
-       io_wqe_worker(data);
-       do_exit(0);
-}
-
-static int task_thread_bound(void *data)
-{
-       return task_thread(data, IO_WQ_ACCT_BOUND);
-}
-
-static int task_thread_unbound(void *data)
-{
-       return task_thread(data, IO_WQ_ACCT_UNBOUND);
-}
-
-pid_t io_wq_fork_thread(int (*fn)(void *), void *arg)
-{
-       unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
-                               CLONE_IO|SIGCHLD;
-       struct kernel_clone_args args = {
-               .flags          = ((lower_32_bits(flags) | CLONE_VM |
-                                   CLONE_UNTRACED) & ~CSIGNAL),
-               .exit_signal    = (lower_32_bits(flags) & CSIGNAL),
-               .stack          = (unsigned long)fn,
-               .stack_size     = (unsigned long)arg,
-       };
-
-       return kernel_clone(&args);
-}
-
 static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 {
+       struct io_wqe_acct *acct = &wqe->acct[index];
        struct io_worker *worker;
-       pid_t pid;
+       struct task_struct *tsk;
 
        __set_current_state(TASK_RUNNING);
 
@@ -645,17 +579,32 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
        spin_lock_init(&worker->lock);
        init_completion(&worker->ref_done);
 
-       refcount_inc(&wq->refs);
+       atomic_inc(&wq->worker_refs);
 
-       if (index == IO_WQ_ACCT_BOUND)
-               pid = io_wq_fork_thread(task_thread_bound, worker);
-       else
-               pid = io_wq_fork_thread(task_thread_unbound, worker);
-       if (pid < 0) {
-               io_wq_put(wq);
+       tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+       if (IS_ERR(tsk)) {
+               if (atomic_dec_and_test(&wq->worker_refs))
+                       complete(&wq->worker_done);
                kfree(worker);
                return false;
        }
+
+       tsk->pf_io_worker = worker;
+       worker->task = tsk;
+       set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
+       tsk->flags |= PF_NOFREEZE | PF_NO_SETAFFINITY;
+
+       raw_spin_lock_irq(&wqe->lock);
+       hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
+       list_add_tail_rcu(&worker->all_list, &wqe->all_list);
+       worker->flags |= IO_WORKER_F_FREE;
+       if (index == IO_WQ_ACCT_BOUND)
+               worker->flags |= IO_WORKER_F_BOUND;
+       if (!acct->nr_workers && (worker->flags & IO_WORKER_F_BOUND))
+               worker->flags |= IO_WORKER_F_FIXED;
+       acct->nr_workers++;
+       raw_spin_unlock_irq(&wqe->lock);
+       wake_up_new_task(tsk);
        return true;
 }
 
@@ -664,6 +613,8 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
 {
        struct io_wqe_acct *acct = &wqe->acct[index];
 
+       if (acct->nr_workers && test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state))
+               return false;
        /* if we have available workers or no work, no need */
        if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
                return false;
@@ -697,6 +648,7 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
 
 static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 {
+       set_notify_signal(worker->task);
        wake_up_process(worker->task);
        return false;
 }
@@ -725,6 +677,23 @@ static void io_wq_check_workers(struct io_wq *wq)
        }
 }
 
+static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
+{
+       return true;
+}
+
+static void io_wq_cancel_pending(struct io_wq *wq)
+{
+       struct io_cb_cancel_data match = {
+               .fn             = io_wq_work_match_all,
+               .cancel_all     = true,
+       };
+       int node;
+
+       for_each_node(node)
+               io_wqe_cancel_pending_work(wq->wqes[node], &match);
+}
+
 /*
  * Manager thread. Tasked with creating new workers, if we need them.
  */
@@ -732,25 +701,38 @@ static int io_wq_manager(void *data)
 {
        struct io_wq *wq = data;
        char buf[TASK_COMM_LEN];
+       int node;
 
        sprintf(buf, "iou-mgr-%d", wq->task_pid);
        set_task_comm(current, buf);
-       current->flags |= PF_IO_WORKER;
-       wq->manager = current;
-
-       complete(&wq->done);
 
        do {
                set_current_state(TASK_INTERRUPTIBLE);
                io_wq_check_workers(wq);
                schedule_timeout(HZ);
+               try_to_freeze();
                if (fatal_signal_pending(current))
                        set_bit(IO_WQ_BIT_EXIT, &wq->state);
        } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
 
        io_wq_check_workers(wq);
-       wq->manager = NULL;
-       io_wq_put(wq);
+
+       rcu_read_lock();
+       for_each_node(node)
+               io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
+       rcu_read_unlock();
+
+       /* we might not ever have created any workers */
+       if (atomic_read(&wq->worker_refs))
+               wait_for_completion(&wq->worker_done);
+
+       spin_lock_irq(&wq->hash->wait.lock);
+       for_each_node(node)
+               list_del_init(&wq->wqes[node]->wait.entry);
+       spin_unlock_irq(&wq->hash->wait.lock);
+
+       io_wq_cancel_pending(wq);
+       complete(&wq->exited);
        do_exit(0);
 }
 
@@ -787,23 +769,20 @@ append:
 
 static int io_wq_fork_manager(struct io_wq *wq)
 {
-       int ret;
+       struct task_struct *tsk;
 
        if (wq->manager)
                return 0;
 
-       clear_bit(IO_WQ_BIT_EXIT, &wq->state);
-       refcount_inc(&wq->refs);
-       current->flags |= PF_IO_WORKER;
-       ret = io_wq_fork_thread(io_wq_manager, wq);
-       current->flags &= ~PF_IO_WORKER;
-       if (ret >= 0) {
-               wait_for_completion(&wq->done);
+       reinit_completion(&wq->worker_done);
+       tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE);
+       if (!IS_ERR(tsk)) {
+               wq->manager = get_task_struct(tsk);
+               wake_up_new_task(tsk);
                return 0;
        }
 
-       io_wq_put(wq);
-       return ret;
+       return PTR_ERR(tsk);
 }
 
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
@@ -813,7 +792,8 @@ static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
        unsigned long flags;
 
        /* Can only happen if manager creation fails after exec */
-       if (unlikely(io_wq_fork_manager(wqe->wq))) {
+       if (io_wq_fork_manager(wqe->wq) ||
+           test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
                work->flags |= IO_WQ_WORK_CANCEL;
                wqe->wq->do_work(work);
                return;
@@ -849,14 +829,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
        work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
 }
 
-struct io_cb_cancel_data {
-       work_cancel_fn *fn;
-       void *data;
-       int nr_running;
-       int nr_pending;
-       bool cancel_all;
-};
-
 static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
 {
        struct io_cb_cancel_data *match = data;
@@ -1043,16 +1015,18 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
        }
 
        wq->task_pid = current->pid;
-       init_completion(&wq->done);
+       init_completion(&wq->exited);
        refcount_set(&wq->refs, 1);
 
+       init_completion(&wq->worker_done);
+       atomic_set(&wq->worker_refs, 0);
+
        ret = io_wq_fork_manager(wq);
        if (!ret)
                return wq;
 
-       io_wq_put(wq);
-       io_wq_put_hash(data->hash);
 err:
+       io_wq_put_hash(data->hash);
        cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
        for_each_node(node)
                kfree(wq->wqes[node]);
@@ -1063,6 +1037,16 @@ err_wq:
        return ERR_PTR(ret);
 }
 
+static void io_wq_destroy_manager(struct io_wq *wq)
+{
+       if (wq->manager) {
+               wake_up_process(wq->manager);
+               wait_for_completion(&wq->exited);
+               put_task_struct(wq->manager);
+               wq->manager = NULL;
+       }
+}
+
 static void io_wq_destroy(struct io_wq *wq)
 {
        int node;
@@ -1070,26 +1054,16 @@ static void io_wq_destroy(struct io_wq *wq)
        cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 
        set_bit(IO_WQ_BIT_EXIT, &wq->state);
-       if (wq->manager)
-               wake_up_process(wq->manager);
-
-       rcu_read_lock();
-       for_each_node(node)
-               io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
-       rcu_read_unlock();
+       io_wq_destroy_manager(wq);
 
-       spin_lock_irq(&wq->hash->wait.lock);
        for_each_node(node) {
                struct io_wqe *wqe = wq->wqes[node];
-
-               list_del_init(&wqe->wait.entry);
+               WARN_ON_ONCE(!wq_list_empty(&wqe->work_list));
                kfree(wqe);
        }
-       spin_unlock_irq(&wq->hash->wait.lock);
        io_wq_put_hash(wq->hash);
        kfree(wq->wqes);
        kfree(wq);
-
 }
 
 void io_wq_put(struct io_wq *wq)
@@ -1098,6 +1072,13 @@ void io_wq_put(struct io_wq *wq)
                io_wq_destroy(wq);
 }
 
+void io_wq_put_and_exit(struct io_wq *wq)
+{
+       set_bit(IO_WQ_BIT_EXIT, &wq->state);
+       io_wq_destroy_manager(wq);
+       io_wq_put(wq);
+}
+
 static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
 {
        struct task_struct *task = worker->task;
index b6ca12b..5fbf799 100644 (file)
@@ -79,8 +79,8 @@ static inline void wq_list_del(struct io_wq_work_list *list,
 
 struct io_wq_work {
        struct io_wq_work_node list;
-       const struct cred *creds;
        unsigned flags;
+       unsigned short personality;
 };
 
 static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
@@ -114,12 +114,11 @@ struct io_wq_data {
 
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
 void io_wq_put(struct io_wq *wq);
+void io_wq_put_and_exit(struct io_wq *wq);
 
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_hash_work(struct io_wq_work *work, void *val);
 
-pid_t io_wq_fork_thread(int (*fn)(void *), void *arg);
-
 static inline bool io_wq_is_hashed(struct io_wq_work *work)
 {
        return work->flags & IO_WQ_WORK_HASHED;
index 4a08858..92c25b5 100644 (file)
 #include <linux/fsnotify.h>
 #include <linux/fadvise.h>
 #include <linux/eventpoll.h>
-#include <linux/fs_struct.h>
 #include <linux/splice.h>
 #include <linux/task_work.h>
 #include <linux/pagemap.h>
 #include <linux/io_uring.h>
-#include <linux/blk-cgroup.h>
-#include <linux/audit.h>
+#include <linux/freezer.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
@@ -276,7 +274,7 @@ struct io_sq_data {
 
        unsigned long           state;
        struct completion       startup;
-       struct completion       completion;
+       struct completion       parked;
        struct completion       exited;
 };
 
@@ -338,7 +336,6 @@ struct io_ring_ctx {
                unsigned int            drain_next: 1;
                unsigned int            eventfd_async: 1;
                unsigned int            restricted: 1;
-               unsigned int            sqo_dead: 1;
                unsigned int            sqo_exec: 1;
 
                /*
@@ -380,11 +377,6 @@ struct io_ring_ctx {
 
        struct io_rings *rings;
 
-       /*
-        * For SQPOLL usage
-        */
-       struct task_struct      *sqo_task;
-
        /* Only used for accounting purposes */
        struct mm_struct        *mm_account;
 
@@ -688,7 +680,6 @@ enum {
        REQ_F_POLLED_BIT,
        REQ_F_BUFFER_SELECTED_BIT,
        REQ_F_NO_FILE_TABLE_BIT,
-       REQ_F_WORK_INITIALIZED_BIT,
        REQ_F_LTIMEOUT_ACTIVE_BIT,
        REQ_F_COMPLETE_INLINE_BIT,
 
@@ -712,7 +703,7 @@ enum {
 
        /* fail rest of links */
        REQ_F_FAIL_LINK         = BIT(REQ_F_FAIL_LINK_BIT),
-       /* on inflight list */
+       /* on inflight list, should be cancelled and waited on exit reliably */
        REQ_F_INFLIGHT          = BIT(REQ_F_INFLIGHT_BIT),
        /* read/write uses file position */
        REQ_F_CUR_POS           = BIT(REQ_F_CUR_POS_BIT),
@@ -730,8 +721,6 @@ enum {
        REQ_F_BUFFER_SELECTED   = BIT(REQ_F_BUFFER_SELECTED_BIT),
        /* doesn't need file table for this request */
        REQ_F_NO_FILE_TABLE     = BIT(REQ_F_NO_FILE_TABLE_BIT),
-       /* io_wq_work is initialized */
-       REQ_F_WORK_INITIALIZED  = BIT(REQ_F_WORK_INITIALIZED_BIT),
        /* linked timeout is active, i.e. prepared by link's head */
        REQ_F_LTIMEOUT_ACTIVE   = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
        /* completion is deferred through io_comp_state */
@@ -1080,9 +1069,7 @@ static bool io_match_task(struct io_kiocb *head,
                return true;
 
        io_for_each_link(req, head) {
-               if (!(req->flags & REQ_F_WORK_INITIALIZED))
-                       continue;
-               if (req->file && req->file->f_op == &io_uring_fops)
+               if (req->flags & REQ_F_INFLIGHT)
                        return true;
                if (req->task->files == files)
                        return true;
@@ -1096,24 +1083,6 @@ static inline void req_set_fail_links(struct io_kiocb *req)
                req->flags |= REQ_F_FAIL_LINK;
 }
 
-static inline void __io_req_init_async(struct io_kiocb *req)
-{
-       memset(&req->work, 0, sizeof(req->work));
-       req->flags |= REQ_F_WORK_INITIALIZED;
-}
-
-/*
- * Note: must call io_req_init_async() for the first time you
- * touch any members of io_wq_work.
- */
-static inline void io_req_init_async(struct io_kiocb *req)
-{
-       if (req->flags & REQ_F_WORK_INITIALIZED)
-               return;
-
-       __io_req_init_async(req);
-}
-
 static void io_ring_ctx_ref_free(struct percpu_ref *ref)
 {
        struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
@@ -1196,37 +1165,11 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
        return false;
 }
 
-static void io_req_clean_work(struct io_kiocb *req)
-{
-       if (!(req->flags & REQ_F_WORK_INITIALIZED))
-               return;
-
-       if (req->work.creds) {
-               put_cred(req->work.creds);
-               req->work.creds = NULL;
-       }
-       if (req->flags & REQ_F_INFLIGHT) {
-               struct io_ring_ctx *ctx = req->ctx;
-               struct io_uring_task *tctx = req->task->io_uring;
-               unsigned long flags;
-
-               spin_lock_irqsave(&ctx->inflight_lock, flags);
-               list_del(&req->inflight_entry);
-               spin_unlock_irqrestore(&ctx->inflight_lock, flags);
-               req->flags &= ~REQ_F_INFLIGHT;
-               if (atomic_read(&tctx->in_idle))
-                       wake_up(&tctx->wait);
-       }
-
-       req->flags &= ~REQ_F_WORK_INITIALIZED;
-}
-
 static void io_req_track_inflight(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
        if (!(req->flags & REQ_F_INFLIGHT)) {
-               io_req_init_async(req);
                req->flags |= REQ_F_INFLIGHT;
 
                spin_lock_irq(&ctx->inflight_lock);
@@ -1240,8 +1183,6 @@ static void io_prep_async_work(struct io_kiocb *req)
        const struct io_op_def *def = &io_op_defs[req->opcode];
        struct io_ring_ctx *ctx = req->ctx;
 
-       io_req_init_async(req);
-
        if (req->flags & REQ_F_FORCE_ASYNC)
                req->work.flags |= IO_WQ_WORK_CONCURRENT;
 
@@ -1252,8 +1193,6 @@ static void io_prep_async_work(struct io_kiocb *req)
                if (def->unbound_nonreg_file)
                        req->work.flags |= IO_WQ_WORK_UNBOUND;
        }
-       if (!req->work.creds)
-               req->work.creds = get_current_cred();
 }
 
 static void io_prep_async_link(struct io_kiocb *req)
@@ -1264,7 +1203,7 @@ static void io_prep_async_link(struct io_kiocb *req)
                io_prep_async_work(cur);
 }
 
-static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
+static void io_queue_async_work(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
        struct io_kiocb *link = io_prep_linked_timeout(req);
@@ -1275,18 +1214,9 @@ static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
 
        trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
                                        &req->work, req->flags);
-       io_wq_enqueue(tctx->io_wq, &req->work);
-       return link;
-}
-
-static void io_queue_async_work(struct io_kiocb *req)
-{
-       struct io_kiocb *link;
-
        /* init ->work of the whole link before punting */
        io_prep_async_link(req);
-       link = __io_queue_async_work(req);
-
+       io_wq_enqueue(tctx->io_wq, &req->work);
        if (link)
                io_queue_linked_timeout(link);
 }
@@ -1521,18 +1451,22 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
        return all_flushed;
 }
 
-static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
                                     struct task_struct *tsk,
                                     struct files_struct *files)
 {
+       bool ret = true;
+
        if (test_bit(0, &ctx->cq_check_overflow)) {
                /* iopoll syncs against uring_lock, not completion_lock */
                if (ctx->flags & IORING_SETUP_IOPOLL)
                        mutex_lock(&ctx->uring_lock);
-               __io_cqring_overflow_flush(ctx, force, tsk, files);
+               ret = __io_cqring_overflow_flush(ctx, force, tsk, files);
                if (ctx->flags & IORING_SETUP_IOPOLL)
                        mutex_unlock(&ctx->uring_lock);
        }
+
+       return ret;
 }
 
 static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
@@ -1714,9 +1648,19 @@ static void io_dismantle_req(struct io_kiocb *req)
                io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
        if (req->fixed_rsrc_refs)
                percpu_ref_put(req->fixed_rsrc_refs);
-       io_req_clean_work(req);
+
+       if (req->flags & REQ_F_INFLIGHT) {
+               struct io_ring_ctx *ctx = req->ctx;
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->inflight_lock, flags);
+               list_del(&req->inflight_entry);
+               spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+               req->flags &= ~REQ_F_INFLIGHT;
+       }
 }
 
+/* must to be called somewhat shortly after putting a request */
 static inline void io_put_task(struct task_struct *task, int nr)
 {
        struct io_uring_task *tctx = task->io_uring;
@@ -1800,15 +1744,7 @@ static void io_fail_links(struct io_kiocb *req)
                trace_io_uring_fail_link(req, link);
                io_cqring_fill_event(link, -ECANCELED);
 
-               /*
-                * It's ok to free under spinlock as they're not linked anymore,
-                * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
-                * work.fs->lock.
-                */
-               if (link->flags & REQ_F_WORK_INITIALIZED)
-                       io_put_req_deferred(link, 2);
-               else
-                       io_double_put_req(link);
+               io_put_req_deferred(link, 2);
                link = nxt;
        }
        io_commit_cqring(ctx);
@@ -1845,6 +1781,18 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
        return __io_req_find_next(req);
 }
 
+static void ctx_flush_and_put(struct io_ring_ctx *ctx)
+{
+       if (!ctx)
+               return;
+       if (ctx->submit_state.comp.nr) {
+               mutex_lock(&ctx->uring_lock);
+               io_submit_flush_completions(&ctx->submit_state.comp, ctx);
+               mutex_unlock(&ctx->uring_lock);
+       }
+       percpu_ref_put(&ctx->refs);
+}
+
 static bool __tctx_task_work(struct io_uring_task *tctx)
 {
        struct io_ring_ctx *ctx = NULL;
@@ -1862,30 +1810,20 @@ static bool __tctx_task_work(struct io_uring_task *tctx)
        node = list.first;
        while (node) {
                struct io_wq_work_node *next = node->next;
-               struct io_ring_ctx *this_ctx;
                struct io_kiocb *req;
 
                req = container_of(node, struct io_kiocb, io_task_work.node);
-               this_ctx = req->ctx;
-               req->task_work.func(&req->task_work);
-               node = next;
-
-               if (!ctx) {
-                       ctx = this_ctx;
-               } else if (ctx != this_ctx) {
-                       mutex_lock(&ctx->uring_lock);
-                       io_submit_flush_completions(&ctx->submit_state.comp, ctx);
-                       mutex_unlock(&ctx->uring_lock);
-                       ctx = this_ctx;
+               if (req->ctx != ctx) {
+                       ctx_flush_and_put(ctx);
+                       ctx = req->ctx;
+                       percpu_ref_get(&ctx->refs);
                }
-       }
 
-       if (ctx && ctx->submit_state.comp.nr) {
-               mutex_lock(&ctx->uring_lock);
-               io_submit_flush_completions(&ctx->submit_state.comp, ctx);
-               mutex_unlock(&ctx->uring_lock);
+               req->task_work.func(&req->task_work);
+               node = next;
        }
 
+       ctx_flush_and_put(ctx);
        return list.first != NULL;
 }
 
@@ -1893,10 +1831,10 @@ static void tctx_task_work(struct callback_head *cb)
 {
        struct io_uring_task *tctx = container_of(cb, struct io_uring_task, task_work);
 
+       clear_bit(0, &tctx->task_state);
+
        while (__tctx_task_work(tctx))
                cond_resched();
-
-       clear_bit(0, &tctx->task_state);
 }
 
 static int io_task_work_add(struct task_struct *tsk, struct io_kiocb *req,
@@ -2010,7 +1948,7 @@ static void __io_req_task_submit(struct io_kiocb *req)
 
        /* ctx stays valid until unlock, even if we drop all ours ctx->refs */
        mutex_lock(&ctx->uring_lock);
-       if (!ctx->sqo_dead && !(current->flags & PF_EXITING) && !current->in_execve)
+       if (!(current->flags & PF_EXITING) && !current->in_execve)
                __io_queue_sqe(req);
        else
                __io_req_task_cancel(req, -EFAULT);
@@ -2472,23 +2410,32 @@ static bool io_resubmit_prep(struct io_kiocb *req)
                return false;
        return !io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
 }
-#endif
 
-static bool io_rw_reissue(struct io_kiocb *req)
+static bool io_rw_should_reissue(struct io_kiocb *req)
 {
-#ifdef CONFIG_BLOCK
        umode_t mode = file_inode(req->file)->i_mode;
+       struct io_ring_ctx *ctx = req->ctx;
 
        if (!S_ISBLK(mode) && !S_ISREG(mode))
                return false;
-       if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker())
+       if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() &&
+           !(ctx->flags & IORING_SETUP_IOPOLL)))
                return false;
        /*
         * If ref is dying, we might be running poll reap from the exit work.
         * Don't attempt to reissue from that path, just let it fail with
         * -EAGAIN.
         */
-       if (percpu_ref_is_dying(&req->ctx->refs))
+       if (percpu_ref_is_dying(&ctx->refs))
+               return false;
+       return true;
+}
+#endif
+
+static bool io_rw_reissue(struct io_kiocb *req)
+{
+#ifdef CONFIG_BLOCK
+       if (!io_rw_should_reissue(req))
                return false;
 
        lockdep_assert_held(&req->ctx->uring_lock);
@@ -2531,6 +2478,19 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 {
        struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
+#ifdef CONFIG_BLOCK
+       /* Rewind iter, if we have one. iopoll path resubmits as usual */
+       if (res == -EAGAIN && io_rw_should_reissue(req)) {
+               struct io_async_rw *rw = req->async_data;
+
+               if (rw)
+                       iov_iter_revert(&rw->iter,
+                                       req->result - iov_iter_count(&rw->iter));
+               else if (!io_resubmit_prep(req))
+                       res = -EIO;
+       }
+#endif
+
        if (kiocb->ki_flags & IOCB_WRITE)
                kiocb_end_write(req);
 
@@ -3279,6 +3239,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
        ret = io_iter_do_read(req, iter);
 
        if (ret == -EIOCBQUEUED) {
+               if (req->async_data)
+                       iov_iter_revert(iter, io_size - iov_iter_count(iter));
                goto out_free;
        } else if (ret == -EAGAIN) {
                /* IOPOLL retry should happen for io-wq threads */
@@ -3324,6 +3286,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                if (ret == -EIOCBQUEUED)
                        return 0;
                /* we got some bytes, but not all. retry. */
+               kiocb->ki_flags &= ~IOCB_WAITQ;
        } while (ret > 0 && ret < io_size);
 done:
        kiocb_done(kiocb, ret, issue_flags);
@@ -3410,6 +3373,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
        /* no retry on NONBLOCK nor RWF_NOWAIT */
        if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT))
                goto done;
+       if (ret2 == -EIOCBQUEUED && req->async_data)
+               iov_iter_revert(iter, io_size - iov_iter_count(iter));
        if (!force_nonblock || ret2 != -EAGAIN) {
                /* IOPOLL retry should happen for io-wq threads */
                if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN)
@@ -3588,7 +3553,6 @@ static int __io_splice_prep(struct io_kiocb *req,
                 * Splice operation will be punted aync, and here need to
                 * modify io_wq_work.flags, so initialize io_wq_work firstly.
                 */
-               io_req_init_async(req);
                req->work.flags |= IO_WQ_WORK_UNBOUND;
        }
 
@@ -3864,7 +3828,7 @@ err:
 
 static int io_openat(struct io_kiocb *req, unsigned int issue_flags)
 {
-       return io_openat2(req, issue_flags & IO_URING_F_NONBLOCK);
+       return io_openat2(req, issue_flags);
 }
 
 static int io_remove_buffers_prep(struct io_kiocb *req,
@@ -5003,6 +4967,9 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
                        pt->error = -EINVAL;
                        return;
                }
+               /* double add on the same waitqueue head, ignore */
+               if (poll->head == head)
+                       return;
                poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
                if (!poll) {
                        pt->error = -ENOMEM;
@@ -5538,6 +5505,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 
        data->mode = io_translate_timeout_mode(flags);
        hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
+       io_req_track_inflight(req);
        return 0;
 }
 
@@ -5945,8 +5913,22 @@ static void __io_clean_op(struct io_kiocb *req)
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_ring_ctx *ctx = req->ctx;
+       const struct cred *creds = NULL;
        int ret;
 
+       if (req->work.personality) {
+               const struct cred *new_creds;
+
+               if (!(issue_flags & IO_URING_F_NONBLOCK))
+                       mutex_lock(&ctx->uring_lock);
+               new_creds = idr_find(&ctx->personality_idr, req->work.personality);
+               if (!(issue_flags & IO_URING_F_NONBLOCK))
+                       mutex_unlock(&ctx->uring_lock);
+               if (!new_creds)
+                       return -EINVAL;
+               creds = override_creds(new_creds);
+       }
+
        switch (req->opcode) {
        case IORING_OP_NOP:
                ret = io_nop(req, issue_flags);
@@ -6053,6 +6035,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
                break;
        }
 
+       if (creds)
+               revert_creds(creds);
+
        if (ret)
                return ret;
 
@@ -6216,18 +6201,10 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
 static void __io_queue_sqe(struct io_kiocb *req)
 {
        struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
-       const struct cred *old_creds = NULL;
        int ret;
 
-       if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
-           req->work.creds != current_cred())
-               old_creds = override_creds(req->work.creds);
-
        ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
 
-       if (old_creds)
-               revert_creds(old_creds);
-
        /*
         * We async punt it if the file wasn't marked NOWAIT, or if the file
         * doesn't support non-blocking read/write attempts
@@ -6314,7 +6291,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 {
        struct io_submit_state *state;
        unsigned int sqe_flags;
-       int id, ret = 0;
+       int ret = 0;
 
        req->opcode = READ_ONCE(sqe->opcode);
        /* same numerical values with corresponding REQ_F_*, safe to copy */
@@ -6346,15 +6323,9 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
            !io_op_defs[req->opcode].buffer_select)
                return -EOPNOTSUPP;
 
-       id = READ_ONCE(sqe->personality);
-       if (id) {
-               __io_req_init_async(req);
-               req->work.creds = idr_find(&ctx->personality_idr, id);
-               if (unlikely(!req->work.creds))
-                       return -EINVAL;
-               get_cred(req->work.creds);
-       }
-
+       req->work.list.next = NULL;
+       req->work.flags = 0;
+       req->work.personality = READ_ONCE(sqe->personality);
        state = &ctx->submit_state;
 
        /*
@@ -6616,8 +6587,7 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
                if (!list_empty(&ctx->iopoll_list))
                        io_do_iopoll(ctx, &nr_events, 0);
 
-               if (to_submit && !ctx->sqo_dead &&
-                   likely(!percpu_ref_is_dying(&ctx->refs)))
+               if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)))
                        ret = io_submit_sqes(ctx, to_submit);
                mutex_unlock(&ctx->uring_lock);
        }
@@ -6686,7 +6656,7 @@ static void io_sq_thread_parkme(struct io_sq_data *sqd)
                 * wait_task_inactive().
                 */
                preempt_disable();
-               complete(&sqd->completion);
+               complete(&sqd->parked);
                schedule_preempt_disabled();
                preempt_enable();
        }
@@ -6703,7 +6673,6 @@ static int io_sq_thread(void *data)
 
        sprintf(buf, "iou-sqp-%d", sqd->task_pid);
        set_task_comm(current, buf);
-       sqd->thread = current;
        current->pf_io_worker = NULL;
 
        if (sqd->sq_cpu != -1)
@@ -6712,8 +6681,6 @@ static int io_sq_thread(void *data)
                set_cpus_allowed_ptr(current, cpu_online_mask);
        current->flags |= PF_NO_SETAFFINITY;
 
-       complete(&sqd->completion);
-
        wait_for_completion(&sqd->startup);
 
        while (!io_sq_thread_should_stop(sqd)) {
@@ -6770,6 +6737,7 @@ static int io_sq_thread(void *data)
                                io_ring_set_wakeup_flag(ctx);
 
                        schedule();
+                       try_to_freeze();
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
                                io_ring_clear_wakeup_flag(ctx);
                }
@@ -6784,18 +6752,25 @@ static int io_sq_thread(void *data)
        io_run_task_work();
 
        /*
-        * Clear thread under lock so that concurrent parks work correctly
+        * Ensure that we park properly if racing with someone trying to park
+        * while we're exiting. If we fail to grab the lock, check park and
+        * park if necessary. The ordering with the park bit and the lock
+        * ensures that we catch this reliably.
         */
-       complete_all(&sqd->completion);
-       mutex_lock(&sqd->lock);
+       if (!mutex_trylock(&sqd->lock)) {
+               if (io_sq_thread_should_park(sqd))
+                       io_sq_thread_parkme(sqd);
+               mutex_lock(&sqd->lock);
+       }
+
        sqd->thread = NULL;
        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
                ctx->sqo_exec = 1;
                io_ring_set_wakeup_flag(ctx);
        }
-       mutex_unlock(&sqd->lock);
 
        complete(&sqd->exited);
+       mutex_unlock(&sqd->lock);
        do_exit(0);
 }
 
@@ -6917,11 +6892,16 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
        iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
        trace_io_uring_cqring_wait(ctx, min_events);
        do {
-               io_cqring_overflow_flush(ctx, false, NULL, NULL);
+               /* if we can't even flush overflow, don't wait for more */
+               if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) {
+                       ret = -EBUSY;
+                       break;
+               }
                prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
                                                TASK_INTERRUPTIBLE);
                ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
                finish_wait(&ctx->wait, &iowq.wq);
+               cond_resched();
        } while (ret > 0);
 
        restore_saved_sigmask_unless(ret == -EINTR);
@@ -7091,40 +7071,42 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 static void io_sq_thread_unpark(struct io_sq_data *sqd)
        __releases(&sqd->lock)
 {
-       if (!sqd->thread)
-               return;
        if (sqd->thread == current)
                return;
        clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
-       wake_up_state(sqd->thread, TASK_PARKED);
+       if (sqd->thread)
+               wake_up_state(sqd->thread, TASK_PARKED);
        mutex_unlock(&sqd->lock);
 }
 
-static bool io_sq_thread_park(struct io_sq_data *sqd)
+static void io_sq_thread_park(struct io_sq_data *sqd)
        __acquires(&sqd->lock)
 {
        if (sqd->thread == current)
-               return true;
+               return;
+       set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
        mutex_lock(&sqd->lock);
-       if (!sqd->thread) {
-               mutex_unlock(&sqd->lock);
-               return false;
+       if (sqd->thread) {
+               wake_up_process(sqd->thread);
+               wait_for_completion(&sqd->parked);
        }
-       set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
-       wake_up_process(sqd->thread);
-       wait_for_completion(&sqd->completion);
-       return true;
 }
 
 static void io_sq_thread_stop(struct io_sq_data *sqd)
 {
-       if (!sqd->thread)
+       if (test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state))
                return;
-
-       set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
-       WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state));
-       wake_up_process(sqd->thread);
-       wait_for_completion(&sqd->exited);
+       mutex_lock(&sqd->lock);
+       if (sqd->thread) {
+               set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+               WARN_ON_ONCE(test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state));
+               wake_up_process(sqd->thread);
+               mutex_unlock(&sqd->lock);
+               wait_for_completion(&sqd->exited);
+               WARN_ON_ONCE(sqd->thread);
+       } else {
+               mutex_unlock(&sqd->lock);
+       }
 }
 
 static void io_put_sq_data(struct io_sq_data *sqd)
@@ -7203,7 +7185,7 @@ static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
        mutex_init(&sqd->lock);
        init_waitqueue_head(&sqd->wait);
        init_completion(&sqd->startup);
-       init_completion(&sqd->completion);
+       init_completion(&sqd->parked);
        init_completion(&sqd->exited);
        return sqd;
 }
@@ -7834,6 +7816,8 @@ void __io_uring_free(struct task_struct *tsk)
        struct io_uring_task *tctx = tsk->io_uring;
 
        WARN_ON_ONCE(!xa_empty(&tctx->xa));
+       WARN_ON_ONCE(tctx->io_wq);
+
        percpu_counter_destroy(&tctx->inflight);
        kfree(tctx);
        tsk->io_uring = NULL;
@@ -7841,21 +7825,22 @@ void __io_uring_free(struct task_struct *tsk)
 
 static int io_sq_thread_fork(struct io_sq_data *sqd, struct io_ring_ctx *ctx)
 {
+       struct task_struct *tsk;
        int ret;
 
        clear_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
-       reinit_completion(&sqd->completion);
-       ctx->sqo_dead = ctx->sqo_exec = 0;
+       reinit_completion(&sqd->parked);
+       ctx->sqo_exec = 0;
        sqd->task_pid = current->pid;
-       current->flags |= PF_IO_WORKER;
-       ret = io_wq_fork_thread(io_sq_thread, sqd);
-       current->flags &= ~PF_IO_WORKER;
-       if (ret < 0) {
-               sqd->thread = NULL;
-               return ret;
-       }
-       wait_for_completion(&sqd->completion);
-       return io_uring_alloc_task_context(sqd->thread, ctx);
+       tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
+       if (IS_ERR(tsk))
+               return PTR_ERR(tsk);
+       ret = io_uring_alloc_task_context(tsk, ctx);
+       if (ret)
+               set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+       sqd->thread = tsk;
+       wake_up_new_task(tsk);
+       return ret;
 }
 
 static int io_sq_offload_create(struct io_ring_ctx *ctx,
@@ -7878,6 +7863,7 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
                fdput(f);
        }
        if (ctx->flags & IORING_SETUP_SQPOLL) {
+               struct task_struct *tsk;
                struct io_sq_data *sqd;
 
                ret = -EPERM;
@@ -7919,15 +7905,16 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
                }
 
                sqd->task_pid = current->pid;
-               current->flags |= PF_IO_WORKER;
-               ret = io_wq_fork_thread(io_sq_thread, sqd);
-               current->flags &= ~PF_IO_WORKER;
-               if (ret < 0) {
-                       sqd->thread = NULL;
+               tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
+               if (IS_ERR(tsk)) {
+                       ret = PTR_ERR(tsk);
                        goto err;
                }
-               wait_for_completion(&sqd->completion);
-               ret = io_uring_alloc_task_context(sqd->thread, ctx);
+               ret = io_uring_alloc_task_context(tsk, ctx);
+               if (ret)
+                       set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+               sqd->thread = tsk;
+               wake_up_new_task(tsk);
                if (ret)
                        goto err;
        } else if (p->flags & IORING_SETUP_SQ_AFF) {
@@ -7946,6 +7933,7 @@ static void io_sq_offload_start(struct io_ring_ctx *ctx)
 {
        struct io_sq_data *sqd = ctx->sq_data;
 
+       ctx->flags &= ~IORING_SETUP_R_DISABLED;
        if (ctx->flags & IORING_SETUP_SQPOLL)
                complete(&sqd->startup);
 }
@@ -8384,7 +8372,7 @@ static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
        }
 }
 
-static void io_req_caches_free(struct io_ring_ctx *ctx, struct task_struct *tsk)
+static void io_req_caches_free(struct io_ring_ctx *ctx)
 {
        struct io_submit_state *submit_state = &ctx->submit_state;
        struct io_comp_state *cs = &ctx->submit_state.comp;
@@ -8444,7 +8432,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 
        percpu_ref_exit(&ctx->refs);
        free_uid(ctx->user);
-       io_req_caches_free(ctx, NULL);
+       io_req_caches_free(ctx);
        if (ctx->hash_map)
                io_wq_put_hash(ctx->hash_map);
        kfree(ctx->cancel_hash);
@@ -8512,16 +8500,13 @@ static int io_remove_personalities(int id, void *p, void *data)
        return 0;
 }
 
-static void io_run_ctx_fallback(struct io_ring_ctx *ctx)
+static bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
 {
-       struct callback_head *work, *head, *next;
+       struct callback_head *work, *next;
+       bool executed = false;
 
        do {
-               do {
-                       head = NULL;
-                       work = READ_ONCE(ctx->exit_task_work);
-               } while (cmpxchg(&ctx->exit_task_work, work, head) != work);
-
+               work = xchg(&ctx->exit_task_work, NULL);
                if (!work)
                        break;
 
@@ -8531,7 +8516,10 @@ static void io_run_ctx_fallback(struct io_ring_ctx *ctx)
                        work = next;
                        cond_resched();
                } while (work);
+               executed = true;
        } while (1);
+
+       return executed;
 }
 
 static void io_ring_exit_work(struct work_struct *work)
@@ -8547,7 +8535,6 @@ static void io_ring_exit_work(struct work_struct *work)
         */
        do {
                io_uring_try_cancel_requests(ctx, NULL, NULL);
-               io_run_ctx_fallback(ctx);
        } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
        io_ring_ctx_free(ctx);
 }
@@ -8556,10 +8543,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 {
        mutex_lock(&ctx->uring_lock);
        percpu_ref_kill(&ctx->refs);
-
-       if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead))
-               ctx->sqo_dead = 1;
-
        /* if force is set, the ring is going away. always drop after that */
        ctx->cq_overflow_flushed = 1;
        if (ctx->rings)
@@ -8648,7 +8631,8 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
                                         struct files_struct *files)
 {
        struct io_task_cancel cancel = { .task = task, .files = files, };
-       struct io_uring_task *tctx = current->io_uring;
+       struct task_struct *tctx_task = task ?: current;
+       struct io_uring_task *tctx = tctx_task->io_uring;
 
        while (1) {
                enum io_wq_cancel cret;
@@ -8671,6 +8655,7 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
                ret |= io_poll_remove_all(ctx, task, files);
                ret |= io_kill_timeouts(ctx, task, files);
                ret |= io_run_task_work();
+               ret |= io_run_ctx_fallback(ctx);
                io_cqring_overflow_flush(ctx, true, task, files);
                if (!ret)
                        break;
@@ -8718,17 +8703,6 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
        }
 }
 
-static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
-{
-       mutex_lock(&ctx->uring_lock);
-       ctx->sqo_dead = 1;
-       mutex_unlock(&ctx->uring_lock);
-
-       /* make sure callers enter the ring to get error */
-       if (ctx->rings)
-               io_ring_set_wakeup_flag(ctx);
-}
-
 /*
  * We need to iteratively cancel requests, in case a request has dependent
  * hard links. These persist even for failure of cancelations, hence keep
@@ -8738,15 +8712,17 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
                                          struct files_struct *files)
 {
        struct task_struct *task = current;
-       bool did_park = false;
 
        if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
-               io_disable_sqo_submit(ctx);
-               did_park = io_sq_thread_park(ctx->sq_data);
-               if (did_park) {
-                       task = ctx->sq_data->thread;
-                       atomic_inc(&task->io_uring->in_idle);
+               /* never started, nothing to cancel */
+               if (ctx->flags & IORING_SETUP_R_DISABLED) {
+                       io_sq_offload_start(ctx);
+                       return;
                }
+               io_sq_thread_park(ctx->sq_data);
+               task = ctx->sq_data->thread;
+               if (task)
+                       atomic_inc(&task->io_uring->in_idle);
        }
 
        io_cancel_defer_files(ctx, task, files);
@@ -8755,10 +8731,10 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
        if (!files)
                io_uring_try_cancel_requests(ctx, task, NULL);
 
-       if (did_park) {
+       if (task)
                atomic_dec(&task->io_uring->in_idle);
+       if (ctx->sq_data)
                io_sq_thread_unpark(ctx->sq_data);
-       }
 }
 
 /*
@@ -8786,10 +8762,6 @@ static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
                                fput(file);
                                return ret;
                        }
-
-                       /* one and only SQPOLL file note, held by sqo_task */
-                       WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) &&
-                                    current != ctx->sqo_task);
                }
                tctx->last = file;
        }
@@ -8819,13 +8791,17 @@ static void io_uring_del_task_file(struct file *file)
                fput(file);
 }
 
-static void io_uring_remove_task_files(struct io_uring_task *tctx)
+static void io_uring_clean_tctx(struct io_uring_task *tctx)
 {
        struct file *file;
        unsigned long index;
 
        xa_for_each(&tctx->xa, index, file)
                io_uring_del_task_file(file);
+       if (tctx->io_wq) {
+               io_wq_put_and_exit(tctx->io_wq);
+               tctx->io_wq = NULL;
+       }
 }
 
 void __io_uring_files_cancel(struct files_struct *files)
@@ -8840,13 +8816,8 @@ void __io_uring_files_cancel(struct files_struct *files)
                io_uring_cancel_task_requests(file->private_data, files);
        atomic_dec(&tctx->in_idle);
 
-       if (files) {
-               io_uring_remove_task_files(tctx);
-               if (tctx->io_wq) {
-                       io_wq_put(tctx->io_wq);
-                       tctx->io_wq = NULL;
-               }
-       }
+       if (files)
+               io_uring_clean_tctx(tctx);
 }
 
 static s64 tctx_inflight(struct io_uring_task *tctx)
@@ -8863,11 +8834,12 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 
        if (!sqd)
                return;
-       io_disable_sqo_submit(ctx);
-       if (!io_sq_thread_park(sqd))
+       io_sq_thread_park(sqd);
+       if (!sqd->thread || !sqd->thread->io_uring) {
+               io_sq_thread_unpark(sqd);
                return;
+       }
        tctx = ctx->sq_data->thread->io_uring;
-
        atomic_inc(&tctx->in_idle);
        do {
                /* read completions before cancelations */
@@ -8903,7 +8875,6 @@ void __io_uring_task_cancel(void)
        /* make sure overflow events are dropped */
        atomic_inc(&tctx->in_idle);
 
-       /* trigger io_disable_sqo_submit() */
        if (tctx->sqpoll) {
                struct file *file;
                unsigned long index;
@@ -8933,53 +8904,9 @@ void __io_uring_task_cancel(void)
 
        atomic_dec(&tctx->in_idle);
 
-       io_uring_remove_task_files(tctx);
-}
-
-static int io_uring_flush(struct file *file, void *data)
-{
-       struct io_uring_task *tctx = current->io_uring;
-       struct io_ring_ctx *ctx = file->private_data;
-
-       /* Ignore helper thread files exit */
-       if (current->flags & PF_IO_WORKER)
-               return 0;
-
-       if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
-               io_uring_cancel_task_requests(ctx, NULL);
-               io_req_caches_free(ctx, current);
-       }
-
-       io_run_ctx_fallback(ctx);
-
-       if (!tctx)
-               return 0;
-
-       /* we should have cancelled and erased it before PF_EXITING */
-       WARN_ON_ONCE((current->flags & PF_EXITING) &&
-                    xa_load(&tctx->xa, (unsigned long)file));
-
-       /*
-        * fput() is pending, will be 2 if the only other ref is our potential
-        * task file note. If the task is exiting, drop regardless of count.
-        */
-       if (atomic_long_read(&file->f_count) != 2)
-               return 0;
-
-       if (ctx->flags & IORING_SETUP_SQPOLL) {
-               /* there is only one file note, which is owned by sqo_task */
-               WARN_ON_ONCE(ctx->sqo_task != current &&
-                            xa_load(&tctx->xa, (unsigned long)file));
-               /* sqo_dead check is for when this happens after cancellation */
-               WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead &&
-                            !xa_load(&tctx->xa, (unsigned long)file));
-
-               io_disable_sqo_submit(ctx);
-       }
-
-       if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current)
-               io_uring_del_task_file(file);
-       return 0;
+       io_uring_clean_tctx(tctx);
+       /* all current's requests should be gone, we can kill tctx */
+       __io_uring_free(current);
 }
 
 static void *io_uring_validate_mmap_request(struct file *file,
@@ -9060,22 +8987,14 @@ static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
        do {
                if (!io_sqring_full(ctx))
                        break;
-
                prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
 
-               if (unlikely(ctx->sqo_dead)) {
-                       ret = -EOWNERDEAD;
-                       goto out;
-               }
-
                if (!io_sqring_full(ctx))
                        break;
-
                schedule();
        } while (!signal_pending(current));
 
        finish_wait(&ctx->sqo_sq_wait, &wait);
-out:
        return ret;
 }
 
@@ -9157,8 +9076,6 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                        ctx->sqo_exec = 0;
                }
                ret = -EOWNERDEAD;
-               if (unlikely(ctx->sqo_dead))
-                       goto out;
                if (flags & IORING_ENTER_SQ_WAKEUP)
                        wake_up(&ctx->sq_data->wait);
                if (flags & IORING_ENTER_SQ_WAIT) {
@@ -9313,7 +9230,6 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
 
 static const struct file_operations io_uring_fops = {
        .release        = io_uring_release,
-       .flush          = io_uring_flush,
        .mmap           = io_uring_mmap,
 #ifndef CONFIG_MMU
        .get_unmapped_area = io_uring_nommu_get_unmapped_area,
@@ -9468,7 +9384,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
        ctx->compat = in_compat_syscall();
        if (!capable(CAP_IPC_LOCK))
                ctx->user = get_uid(current_user());
-       ctx->sqo_task = current;
 
        /*
         * This is just grabbed for accounting purposes. When a process exits,
@@ -9531,7 +9446,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
         */
        ret = io_uring_install_fd(ctx, file);
        if (ret < 0) {
-               io_disable_sqo_submit(ctx);
                /* fput will clean it up */
                fput(file);
                return ret;
@@ -9540,7 +9454,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
        trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
        return ret;
 err:
-       io_disable_sqo_submit(ctx);
        io_ring_ctx_wait_and_kill(ctx);
        return ret;
 }
@@ -9708,10 +9621,7 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
        if (ctx->restrictions.registered)
                ctx->restricted = 1;
 
-       ctx->flags &= ~IORING_SETUP_R_DISABLED;
-
        io_sq_offload_start(ctx);
-
        return 0;
 }
 
index 93a217e..14658b0 100644 (file)
@@ -467,7 +467,7 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type,
 static void pstore_kill_sb(struct super_block *sb)
 {
        mutex_lock(&pstore_sb_lock);
-       WARN_ON(pstore_sb != sb);
+       WARN_ON(pstore_sb && pstore_sb != sb);
 
        kill_litter_super(sb);
        pstore_sb = NULL;
index aa8e0b6..fff363b 100644 (file)
@@ -246,7 +246,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
                pr_info("error in header, %d\n", numerr);
                prz->corrected_bytes += numerr;
        } else if (numerr < 0) {
-               pr_info("uncorrectable error in header\n");
+               pr_info_ratelimited("uncorrectable error in header\n");
                prz->bad_blocks++;
        }
 
index 3c5757d..9f43241 100644 (file)
@@ -746,12 +746,12 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
 
 static inline void acpi_dev_put(struct acpi_device *adev) {}
 
-static inline bool is_acpi_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
 {
        return false;
 }
 
-static inline bool is_acpi_device_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
 {
        return false;
 }
@@ -761,7 +761,7 @@ static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwno
        return NULL;
 }
 
-static inline bool is_acpi_data_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
 {
        return false;
 }
index c032cfe..bc6bc83 100644 (file)
@@ -65,8 +65,6 @@ typedef void (rq_end_io_fn)(struct request *, blk_status_t);
  * request flags */
 typedef __u32 __bitwise req_flags_t;
 
-/* elevator knows about this request */
-#define RQF_SORTED             ((__force req_flags_t)(1 << 0))
 /* drive already may have started this one */
 #define RQF_STARTED            ((__force req_flags_t)(1 << 1))
 /* may not be passed by ioscheduler */
index d2c70d3..44170f3 100644 (file)
@@ -276,4 +276,60 @@ static inline void copy_highpage(struct page *to, struct page *from)
 
 #endif
 
+static inline void memcpy_page(struct page *dst_page, size_t dst_off,
+                              struct page *src_page, size_t src_off,
+                              size_t len)
+{
+       char *dst = kmap_local_page(dst_page);
+       char *src = kmap_local_page(src_page);
+
+       VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
+       memcpy(dst + dst_off, src + src_off, len);
+       kunmap_local(src);
+       kunmap_local(dst);
+}
+
+static inline void memmove_page(struct page *dst_page, size_t dst_off,
+                              struct page *src_page, size_t src_off,
+                              size_t len)
+{
+       char *dst = kmap_local_page(dst_page);
+       char *src = kmap_local_page(src_page);
+
+       VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
+       memmove(dst + dst_off, src + src_off, len);
+       kunmap_local(src);
+       kunmap_local(dst);
+}
+
+static inline void memset_page(struct page *page, size_t offset, int val,
+                              size_t len)
+{
+       char *addr = kmap_local_page(page);
+
+       VM_BUG_ON(offset + len > PAGE_SIZE);
+       memset(addr + offset, val, len);
+       kunmap_local(addr);
+}
+
+static inline void memcpy_from_page(char *to, struct page *page,
+                                   size_t offset, size_t len)
+{
+       char *from = kmap_local_page(page);
+
+       VM_BUG_ON(offset + len > PAGE_SIZE);
+       memcpy(to, from + offset, len);
+       kunmap_local(from);
+}
+
+static inline void memcpy_to_page(struct page *page, size_t offset,
+                                 const char *from, size_t len)
+{
+       char *to = kmap_local_page(page);
+
+       VM_BUG_ON(offset + len > PAGE_SIZE);
+       memcpy(to + offset, from, len);
+       kunmap_local(to);
+}
+
 #endif /* _LINUX_HIGHMEM_H */
index 51ede77..7cb7bd0 100644 (file)
@@ -38,7 +38,7 @@ void __io_uring_free(struct task_struct *tsk);
 
 static inline void io_uring_task_cancel(void)
 {
-       if (current->io_uring && !xa_empty(&current->io_uring->xa))
+       if (current->io_uring)
                __io_uring_task_cancel();
 }
 static inline void io_uring_files_cancel(struct files_struct *files)
index c0f71f2..ef02be8 100644 (file)
@@ -31,6 +31,7 @@ struct kernel_clone_args {
        /* Number of elements in *set_tid */
        size_t set_tid_size;
        int cgroup;
+       int io_thread;
        struct cgroup *cgrp;
        struct css_set *cset;
 };
@@ -82,6 +83,7 @@ extern void exit_files(struct task_struct *);
 extern void exit_itimers(struct signal_struct *);
 
 extern pid_t kernel_clone(struct kernel_clone_args *kargs);
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
 struct task_struct *fork_idle(int);
 struct mm_struct *copy_init_mm(void);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
index 120ffdd..3a5446a 100644 (file)
@@ -187,4 +187,6 @@ void sdw_intel_enable_irq(void __iomem *mmio_base, bool enable);
 
 irqreturn_t sdw_intel_thread(int irq, void *dev_id);
 
+#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE      BIT(1)
+
 #endif
index 32f665b..4cc6ec3 100644 (file)
@@ -485,6 +485,7 @@ struct backing_dev_info;
 extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
 extern void exit_swap_address_space(unsigned int type);
 extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
+sector_t swap_page_sector(struct page *page);
 
 static inline void put_swap_device(struct swap_info_struct *si)
 {
index 7077fec..28e7af1 100644 (file)
@@ -349,15 +349,8 @@ struct trace_event_call {
        struct event_filter     *filter;
        void                    *mod;
        void                    *data;
-       /*
-        *   bit 0:             filter_active
-        *   bit 1:             allow trace by non root (cap any)
-        *   bit 2:             failed to apply filter
-        *   bit 3:             trace internal event (do not enable)
-        *   bit 4:             Event was enabled by module
-        *   bit 5:             use call filter rather than file filter
-        *   bit 6:             Event is a tracepoint
-        */
+
+       /* See the TRACE_EVENT_FL_* flags above */
        int                     flags; /* static flags of different events */
 
 #ifdef CONFIG_PERF_EVENTS
index 743c2f4..d057480 100644 (file)
@@ -112,6 +112,11 @@ struct nhlt_vendor_dmic_array_config {
        /* TODO add vendor mic config */
 } __packed;
 
+enum {
+       NHLT_CONFIG_TYPE_GENERIC = 0,
+       NHLT_CONFIG_TYPE_MIC_ARRAY = 1
+};
+
 enum {
        NHLT_MIC_ARRAY_2CH_SMALL = 0xa,
        NHLT_MIC_ARRAY_2CH_BIG = 0xb,
index 9a43c44..c450750 100644 (file)
@@ -174,7 +174,7 @@ struct snd_soc_acpi_codecs {
 static inline bool snd_soc_acpi_sof_parent(struct device *dev)
 {
        return dev->parent && dev->parent->driver && dev->parent->driver->name &&
-               !strcmp(dev->parent->driver->name, "sof-audio-acpi");
+               !strncmp(dev->parent->driver->name, "sof-audio-acpi", strlen("sof-audio-acpi"));
 }
 
 #endif
index 8b281f7..f6afee2 100644 (file)
@@ -1154,6 +1154,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR       (1 << 0)
 #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL     (1 << 1)
 #define KVM_XEN_HVM_CONFIG_SHARED_INFO         (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
 
 struct kvm_xen_hvm_config {
        __u32 flags;
@@ -1621,12 +1622,24 @@ struct kvm_xen_vcpu_attr {
        union {
                __u64 gpa;
                __u64 pad[8];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
        } u;
 };
 
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR   0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT        0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
index d66cd10..d3171e8 100644 (file)
@@ -1940,6 +1940,8 @@ static __latent_entropy struct task_struct *copy_process(
        p = dup_task_struct(current, node);
        if (!p)
                goto fork_out;
+       if (args->io_thread)
+               p->flags |= PF_IO_WORKER;
 
        /*
         * This _must_ happen before we call free_task(), i.e. before we jump
@@ -2410,6 +2412,34 @@ struct mm_struct *copy_init_mm(void)
        return dup_mm(NULL, &init_mm);
 }
 
+/*
+ * This is like kernel_clone(), but shaved down and tailored to just
+ * creating io_uring workers. It returns a created task, or an error pointer.
+ * The returned task is inactive, and the caller must fire it up through
+ * wake_up_new_task(p). All signals are blocked in the created task.
+ */
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
+{
+       unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
+                               CLONE_IO;
+       struct kernel_clone_args args = {
+               .flags          = ((lower_32_bits(flags) | CLONE_VM |
+                                   CLONE_UNTRACED) & ~CSIGNAL),
+               .exit_signal    = (lower_32_bits(flags) & CSIGNAL),
+               .stack          = (unsigned long)fn,
+               .stack_size     = (unsigned long)arg,
+               .io_thread      = 1,
+       };
+       struct task_struct *tsk;
+
+       tsk = copy_process(NULL, 0, node, &args);
+       if (!IS_ERR(tsk)) {
+               sigfillset(&tsk->blocked);
+               sigdelsetmask(&tsk->blocked, sigmask(SIGKILL));
+       }
+       return tsk;
+}
+
 /*
  *  Ok, this is the main fork-routine.
  *
index 9c266b9..7fa8277 100644 (file)
@@ -694,7 +694,7 @@ config TRACEPOINT_BENCHMARK
        help
         This option creates the tracepoint "benchmark:benchmark_event".
         When the tracepoint is enabled, it kicks off a kernel thread that
-        goes into an infinite loop (calling cond_sched() to let other tasks
+        goes into an infinite loop (calling cond_resched() to let other tasks
         run), and calls the tracepoint. Each iteration will record the time
         it took to write to the tracepoint and the next iteration that
         data will be passed to the tracepoint itself. That is, the tracepoint
index b9dad35..68744c5 100644 (file)
@@ -2814,6 +2814,17 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
                                       write_stamp, write_stamp - delta))
                        return 0;
 
+               /*
+                * It's possible that the event time delta is zero
+                * (has the same time stamp as the previous event)
+                * in which case write_stamp and before_stamp could
+                * be the same. In such a case, force before_stamp
+                * to be different than write_stamp. It doesn't
+                * matter what it is, as long as its different.
+                */
+               if (!delta)
+                       rb_time_set(&cpu_buffer->before_stamp, 0);
+
                /*
                 * If an event were to come in now, it would see that the
                 * write_stamp and the before_stamp are different, and assume
@@ -3307,9 +3318,13 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
                        goto out;
                }
                atomic_inc(&cpu_buffer->record_disabled);
-               pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld after:%lld\n",
-                      cpu_buffer->cpu,
-                      ts + info->delta, info->ts, info->delta, info->after);
+               /* There's some cases in boot up that this can happen */
+               WARN_ON_ONCE(system_state != SYSTEM_BOOTING);
+               pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s\n",
+                       cpu_buffer->cpu,
+                       ts + info->delta, info->ts, info->delta,
+                       info->before, info->after,
+                       full ? " (full)" : "");
                dump_buffer_page(bpage, info, tail);
                atomic_dec(&ts_dump);
                /* Do not re-enable checking */
index e295c41..eccb4e1 100644 (file)
@@ -1929,6 +1929,12 @@ static int run_tracer_selftest(struct tracer *type)
        if (!selftests_can_run)
                return save_selftest(type);
 
+       if (!tracing_is_on()) {
+               pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
+                       type->name);
+               return 0;
+       }
+
        /*
         * Run a selftest on this tracer.
         * Here we reset the trace buffer, and set the current
index dec13ff..a6446c0 100644 (file)
@@ -605,7 +605,6 @@ void trace_graph_function(struct trace_array *tr,
 void trace_latency_header(struct seq_file *m);
 void trace_default_header(struct seq_file *m);
 void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
-int trace_empty(struct trace_iterator *iter);
 
 void trace_graph_return(struct ftrace_graph_ret *trace);
 int trace_graph_entry(struct ftrace_graph_ent *trace);
index 2979a96..8d71e6c 100644 (file)
@@ -1225,8 +1225,10 @@ static int __create_synth_event(const char *name, const char *raw_fields)
                        goto err;
                }
 
-               if (!argc)
+               if (!argc) {
+                       argv_free(argv);
                        continue;
+               }
 
                n_fields_this_loop = 0;
                consumed = 0;
index d8ca336..f66c62a 100644 (file)
@@ -464,20 +464,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_init);
 
-static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
-{
-       char *from = kmap_atomic(page);
-       memcpy(to, from + offset, len);
-       kunmap_atomic(from);
-}
-
-static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
-{
-       char *to = kmap_atomic(page);
-       memcpy(to + offset, from, len);
-       kunmap_atomic(to);
-}
-
 static void memzero_page(struct page *page, size_t offset, size_t len)
 {
        char *addr = kmap_atomic(page);
index 485fa5c..c493ce9 100644 (file)
@@ -254,11 +254,6 @@ out:
        return ret;
 }
 
-static sector_t swap_page_sector(struct page *page)
-{
-       return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
-}
-
 static inline void count_swpout_vm_event(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
index f039745..084a5b9 100644 (file)
@@ -219,6 +219,19 @@ offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset)
        BUG();
 }
 
+sector_t swap_page_sector(struct page *page)
+{
+       struct swap_info_struct *sis = page_swap_info(page);
+       struct swap_extent *se;
+       sector_t sector;
+       pgoff_t offset;
+
+       offset = __page_file_index(page);
+       se = offset_to_swap_extent(sis, offset);
+       sector = se->start_block + (offset - se->start_page);
+       return sector << (PAGE_SHIFT - 9);
+}
+
 /*
  * swap allocation tell device that a cluster of swap can now be discarded,
  * to allow the swap device to optimize its wear-levelling.
diff --git a/scripts/dtc/include-prefixes/c6x b/scripts/dtc/include-prefixes/c6x
deleted file mode 120000 (symlink)
index 49ded4c..0000000
+++ /dev/null
@@ -1 +0,0 @@
-../../../arch/c6x/boot/dts
\ No newline at end of file
index 9dced66..589454b 100644 (file)
@@ -524,7 +524,7 @@ static unsigned int latent_entropy_execute(void)
        while (bb != EXIT_BLOCK_PTR_FOR_FN(cfun)) {
                perturb_local_entropy(bb, local_entropy);
                bb = bb->next_bb;
-       };
+       }
 
        /* 4. mix local entropy into the global entropy variable */
        perturb_latent_entropy(local_entropy);
index 29b480c..d7190e4 100644 (file)
@@ -170,7 +170,6 @@ static void initialize(tree var)
 static unsigned int structleak_execute(void)
 {
        basic_block bb;
-       unsigned int ret = 0;
        tree var;
        unsigned int i;
 
@@ -200,7 +199,7 @@ static unsigned int structleak_execute(void)
                        initialize(var);
        }
 
-       return ret;
+       return 0;
 }
 
 #define PASS_NAME structleak
index b9c2ee7..cce12e1 100644 (file)
@@ -438,7 +438,7 @@ static int arm_is_fake_mcount(Elf32_Rel const *rp)
 
 static int arm64_is_fake_mcount(Elf64_Rel const *rp)
 {
-       return ELF64_R_TYPE(w(rp->r_info)) != R_AARCH64_CALL26;
+       return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26;
 }
 
 /* 64-bit EM_MIPS has weird ELF64_Rela.r_info.
index 9ed5cfa..57595f1 100644 (file)
@@ -44,9 +44,13 @@ config SND_INTEL_NHLT
 config SND_INTEL_DSP_CONFIG
        tristate
        select SND_INTEL_NHLT if ACPI
+       select SND_INTEL_SOUNDWIRE_ACPI if ACPI
        # this config should be selected only for Intel DSP platforms.
        # A fallback is provided so that the code compiles in all cases.
 
+config SND_INTEL_SOUNDWIRE_ACPI
+       tristate
+
 config SND_INTEL_BYT_PREFER_SOF
        bool "Prefer SOF driver over SST on BY/CHT platforms"
        depends on SND_SST_ATOM_HIFI2_PLATFORM_ACPI && SND_SOC_SOF_BAYTRAIL
index 601e617..78f487a 100644 (file)
@@ -17,3 +17,6 @@ obj-$(CONFIG_SND_HDA_EXT_CORE) += ext/
 snd-intel-dspcfg-objs := intel-dsp-config.o
 snd-intel-dspcfg-$(CONFIG_SND_INTEL_NHLT) += intel-nhlt.o
 obj-$(CONFIG_SND_INTEL_DSP_CONFIG) += snd-intel-dspcfg.o
+
+snd-intel-sdw-acpi-objs := intel-sdw-acpi.o
+obj-$(CONFIG_SND_INTEL_SOUNDWIRE_ACPI) += snd-intel-sdw-acpi.o
index a9bd39b..b2df7b4 100644 (file)
@@ -133,7 +133,7 @@ void snd_hdac_link_free_all(struct hdac_bus *bus)
 EXPORT_SYMBOL_GPL(snd_hdac_link_free_all);
 
 /**
- * snd_hdac_ext_bus_get_link_index - get link based on codec name
+ * snd_hdac_ext_bus_get_link - get link based on codec name
  * @bus: the pointer to HDAC bus object
  * @codec_name: codec name
  */
index c4d54a8..0c005d6 100644 (file)
@@ -133,7 +133,7 @@ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
 EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple);
 
 /**
- * snd_hdac_ext_linkstream_start - start a stream
+ * snd_hdac_ext_link_stream_start - start a stream
  * @stream: HD-audio ext core stream to start
  */
 void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *stream)
index d75f31e..fe35875 100644 (file)
@@ -386,7 +386,7 @@ int snd_hdac_regmap_init(struct hdac_device *codec)
 EXPORT_SYMBOL_GPL(snd_hdac_regmap_init);
 
 /**
- * snd_hdac_regmap_init - Release the regmap from HDA codec
+ * snd_hdac_regmap_exit - Release the regmap from HDA codec
  * @codec: the codec object
  */
 void snd_hdac_regmap_exit(struct hdac_device *codec)
index d1eb9d3..ab5ff78 100644 (file)
@@ -557,4 +557,4 @@ EXPORT_SYMBOL_GPL(snd_intel_acpi_dsp_driver_probe);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Intel DSP config driver");
-MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT);
+MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI);
index 059aaf0..d053bec 100644 (file)
@@ -31,18 +31,44 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
        struct nhlt_endpoint *epnt;
        struct nhlt_dmic_array_config *cfg;
        struct nhlt_vendor_dmic_array_config *cfg_vendor;
+       struct nhlt_fmt *fmt_configs;
        unsigned int dmic_geo = 0;
-       u8 j;
+       u16 max_ch = 0;
+       u8 i, j;
 
        if (!nhlt)
                return 0;
 
-       epnt = (struct nhlt_endpoint *)nhlt->desc;
+       for (j = 0, epnt = nhlt->desc; j < nhlt->endpoint_count; j++,
+            epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length)) {
 
-       for (j = 0; j < nhlt->endpoint_count; j++) {
-               if (epnt->linktype == NHLT_LINK_DMIC) {
-                       cfg = (struct nhlt_dmic_array_config  *)
-                                       (epnt->config.caps);
+               if (epnt->linktype != NHLT_LINK_DMIC)
+                       continue;
+
+               cfg = (struct nhlt_dmic_array_config  *)(epnt->config.caps);
+               fmt_configs = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size);
+
+               /* find max number of channels based on format_configuration */
+               if (fmt_configs->fmt_count) {
+                       dev_dbg(dev, "%s: found %d format definitions\n",
+                               __func__, fmt_configs->fmt_count);
+
+                       for (i = 0; i < fmt_configs->fmt_count; i++) {
+                               struct wav_fmt_ext *fmt_ext;
+
+                               fmt_ext = &fmt_configs->fmt_config[i].fmt_ext;
+
+                               if (fmt_ext->fmt.channels > max_ch)
+                                       max_ch = fmt_ext->fmt.channels;
+                       }
+                       dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch);
+               } else {
+                       dev_dbg(dev, "%s: No format information found\n", __func__);
+               }
+
+               if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) {
+                       dmic_geo = max_ch;
+               } else {
                        switch (cfg->array_type) {
                        case NHLT_MIC_ARRAY_2CH_SMALL:
                        case NHLT_MIC_ARRAY_2CH_BIG:
@@ -59,13 +85,23 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
                                dmic_geo = cfg_vendor->nb_mics;
                                break;
                        default:
-                               dev_warn(dev, "undefined DMIC array_type 0x%0x\n",
-                                        cfg->array_type);
+                               dev_warn(dev, "%s: undefined DMIC array_type 0x%0x\n",
+                                        __func__, cfg->array_type);
+                       }
+
+                       if (dmic_geo > 0) {
+                               dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo);
+                       }
+                       if (max_ch > dmic_geo) {
+                               dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n",
+                                       __func__, max_ch, dmic_geo);
                        }
                }
-               epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
        }
 
+       dev_dbg(dev, "%s: dmic number %d max_ch %d\n",
+               __func__, dmic_geo, max_ch);
+
        return dmic_geo;
 }
 EXPORT_SYMBOL_GPL(intel_nhlt_get_dmic_geo);
diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c
new file mode 100644 (file)
index 0000000..c0123bc
--- /dev/null
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2015-2021 Intel Corporation.
+
+/*
+ * SDW Intel ACPI scan helpers
+ */
+
+#include <linux/acpi.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fwnode.h>
+#include <linux/module.h>
+#include <linux/soundwire/sdw_intel.h>
+#include <linux/string.h>
+
+#define SDW_LINK_TYPE          4 /* from Intel ACPI documentation */
+#define SDW_MAX_LINKS          4
+
+static int ctrl_link_mask;
+module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444);
+MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)");
+
+static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
+{
+       struct fwnode_handle *link;
+       char name[32];
+       u32 quirk_mask = 0;
+
+       /* Find master handle */
+       snprintf(name, sizeof(name),
+                "mipi-sdw-link-%d-subproperties", i);
+
+       link = fwnode_get_named_child_node(fw_node, name);
+       if (!link)
+               return false;
+
+       fwnode_property_read_u32(link,
+                                "intel-quirk-mask",
+                                &quirk_mask);
+
+       if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
+               return false;
+
+       return true;
+}
+
+static int
+sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
+{
+       struct acpi_device *adev;
+       int ret, i;
+       u8 count;
+
+       if (acpi_bus_get_device(info->handle, &adev))
+               return -EINVAL;
+
+       /* Found controller, find links supported */
+       count = 0;
+       ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
+                                           "mipi-sdw-master-count", &count, 1);
+
+       /*
+        * In theory we could check the number of links supported in
+        * hardware, but in that step we cannot assume SoundWire IP is
+        * powered.
+        *
+        * In addition, if the BIOS doesn't even provide this
+        * 'master-count' property then all the inits based on link
+        * masks will fail as well.
+        *
+        * We will check the hardware capabilities in the startup() step
+        */
+
+       if (ret) {
+               dev_err(&adev->dev,
+                       "Failed to read mipi-sdw-master-count: %d\n", ret);
+               return -EINVAL;
+       }
+
+       /* Check count is within bounds */
+       if (count > SDW_MAX_LINKS) {
+               dev_err(&adev->dev, "Link count %d exceeds max %d\n",
+                       count, SDW_MAX_LINKS);
+               return -EINVAL;
+       }
+
+       if (!count) {
+               dev_warn(&adev->dev, "No SoundWire links detected\n");
+               return -EINVAL;
+       }
+       dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count);
+
+       info->count = count;
+       info->link_mask = 0;
+
+       for (i = 0; i < count; i++) {
+               if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) {
+                       dev_dbg(&adev->dev,
+                               "Link %d masked, will not be enabled\n", i);
+                       continue;
+               }
+
+               if (!is_link_enabled(acpi_fwnode_handle(adev), i)) {
+                       dev_dbg(&adev->dev,
+                               "Link %d not selected in firmware\n", i);
+                       continue;
+               }
+
+               info->link_mask |= BIT(i);
+       }
+
+       return 0;
+}
+
+static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
+                                    void *cdata, void **return_value)
+{
+       struct sdw_intel_acpi_info *info = cdata;
+       struct acpi_device *adev;
+       acpi_status status;
+       u64 adr;
+
+       status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr);
+       if (ACPI_FAILURE(status))
+               return AE_OK; /* keep going */
+
+       if (acpi_bus_get_device(handle, &adev)) {
+               pr_err("%s: Couldn't find ACPI handle\n", __func__);
+               return AE_NOT_FOUND;
+       }
+
+       info->handle = handle;
+
+       /*
+        * On some Intel platforms, multiple children of the HDAS
+        * device can be found, but only one of them is the SoundWire
+        * controller. The SNDW device is always exposed with
+        * Name(_ADR, 0x40000000), with bits 31..28 representing the
+        * SoundWire link so filter accordingly
+        */
+       if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
+               return AE_OK; /* keep going */
+
+       /* device found, stop namespace walk */
+       return AE_CTRL_TERMINATE;
+}
+
+/**
+ * sdw_intel_acpi_scan() - SoundWire Intel init routine
+ * @parent_handle: ACPI parent handle
+ * @info: description of what firmware/DSDT tables expose
+ *
+ * This scans the namespace and queries firmware to figure out which
+ * links to enable. A follow-up use of sdw_intel_probe() and
+ * sdw_intel_startup() is required for creation of devices and bus
+ * startup
+ */
+int sdw_intel_acpi_scan(acpi_handle *parent_handle,
+                       struct sdw_intel_acpi_info *info)
+{
+       acpi_status status;
+
+       info->handle = NULL;
+       status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
+                                    parent_handle, 1,
+                                    sdw_intel_acpi_cb,
+                                    NULL, info, NULL);
+       if (ACPI_FAILURE(status) || info->handle == NULL)
+               return -ENODEV;
+
+       return sdw_intel_scan_controller(info);
+}
+EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SND_INTEL_SOUNDWIRE_ACPI);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Intel Soundwire ACPI helpers");
index ca6b4b9..e35e931 100644 (file)
@@ -312,14 +312,14 @@ static int __init n64audio_probe(struct platform_device *pdev)
        }
 
        priv->mi_reg_base = devm_platform_ioremap_resource(pdev, 0);
-       if (!priv->mi_reg_base) {
-               err = -EINVAL;
+       if (IS_ERR(priv->mi_reg_base)) {
+               err = PTR_ERR(priv->mi_reg_base);
                goto fail_dma_alloc;
        }
 
        priv->ai_reg_base = devm_platform_ioremap_resource(pdev, 1);
-       if (!priv->ai_reg_base) {
-               err = -EINVAL;
+       if (IS_ERR(priv->ai_reg_base)) {
+               err = PTR_ERR(priv->ai_reg_base);
                goto fail_dma_alloc;
        }
 
index a855fb8..55af8ef 100644 (file)
@@ -991,7 +991,7 @@ static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf)
 
        if (idx < 4) {
                /* S/PDIF output */
-               switch ((conf & 0x7)) {
+               switch ((conf & 0xf)) {
                case 1:
                        set_field(&ctl->txctl[idx], ATXCTL_NUC, 0);
                        break;
index 9b75506..2026f1c 100644 (file)
@@ -3483,7 +3483,7 @@ EXPORT_SYMBOL_GPL(snd_hda_check_amp_list_power);
  */
 
 /**
- * snd_hda_input_mux_info_info - Info callback helper for the input-mux enum
+ * snd_hda_input_mux_info - Info callback helper for the input-mux enum
  * @imux: imux helper object
  * @uinfo: pointer to get/store the data
  */
@@ -3506,7 +3506,7 @@ int snd_hda_input_mux_info(const struct hda_input_mux *imux,
 EXPORT_SYMBOL_GPL(snd_hda_input_mux_info);
 
 /**
- * snd_hda_input_mux_info_put - Put callback helper for the input-mux enum
+ * snd_hda_input_mux_put - Put callback helper for the input-mux enum
  * @codec: the HDA codec
  * @imux: imux helper object
  * @ucontrol: pointer to get/store the data
@@ -3941,7 +3941,7 @@ unsigned int snd_hda_correct_pin_ctl(struct hda_codec *codec,
 EXPORT_SYMBOL_GPL(snd_hda_correct_pin_ctl);
 
 /**
- * _snd_hda_pin_ctl - Helper to set pin ctl value
+ * _snd_hda_set_pin_ctl - Helper to set pin ctl value
  * @codec: the HDA codec
  * @pin: referred pin NID
  * @val: pin control value to set
index 5e40944..8b7c550 100644 (file)
@@ -3923,7 +3923,7 @@ static void vmaster_update_mute_led(void *private_data, int enabled)
 }
 
 /**
- * snd_dha_gen_add_mute_led_cdev - Create a LED classdev and enable as vmaster mute LED
+ * snd_hda_gen_add_mute_led_cdev - Create a LED classdev and enable as vmaster mute LED
  * @codec: the HDA codec
  * @callback: the callback for LED classdev brightness_set_blocking
  */
@@ -4074,7 +4074,7 @@ static int add_micmute_led_hook(struct hda_codec *codec)
 }
 
 /**
- * snd_dha_gen_add_micmute_led_cdev - Create a LED classdev and enable as mic-mute LED
+ * snd_hda_gen_add_micmute_led_cdev - Create a LED classdev and enable as mic-mute LED
  * @codec: the HDA codec
  * @callback: the callback for LED classdev brightness_set_blocking
  *
index b8b5680..ac00866 100644 (file)
@@ -213,7 +213,7 @@ static void jack_detect_update(struct hda_codec *codec,
 }
 
 /**
- * snd_hda_set_dirty_all - Mark all the cached as dirty
+ * snd_hda_jack_set_dirty_all - Mark all the cached as dirty
  * @codec: the HDA codec
  *
  * This function sets the dirty flag to all entries of jack table.
@@ -293,7 +293,7 @@ find_callback_from_list(struct hda_jack_tbl *jack,
 }
 
 /**
- * snd_hda_jack_detect_enable_mst - enable the jack-detection
+ * snd_hda_jack_detect_enable_callback_mst - enable the jack-detection
  * @codec: the HDA codec
  * @nid: pin NID to enable
  * @func: callback function to register
index 7e62aed..c966f49 100644 (file)
@@ -2338,7 +2338,7 @@ static int dspio_send_scp_message(struct hda_codec *codec,
 }
 
 /**
- * Prepare and send the SCP message to DSP
+ * dspio_scp - Prepare and send the SCP message to DSP
  * @codec: the HDA codec
  * @mod_id: ID of the DSP module to send the command
  * @src_id: ID of the source
@@ -2865,7 +2865,7 @@ static int dsp_dma_stop(struct hda_codec *codec,
 }
 
 /**
- * Allocate router ports
+ * dsp_allocate_router_ports - Allocate router ports
  *
  * @codec: the HDA codec
  * @num_chans: number of channels in the stream
@@ -3178,8 +3178,7 @@ static int dspxfr_hci_write(struct hda_codec *codec,
 }
 
 /**
- * Write a block of data into DSP code or data RAM using pre-allocated
- * DMA engine.
+ * dspxfr_one_seg - Write a block of data into DSP code or data RAM using pre-allocated DMA engine.
  *
  * @codec: the HDA codec
  * @fls: pointer to a fast load image
@@ -3376,7 +3375,7 @@ static int dspxfr_one_seg(struct hda_codec *codec,
 }
 
 /**
- * Write the entire DSP image of a DSP code/data overlay to DSP memories
+ * dspxfr_image - Write the entire DSP image of a DSP code/data overlay to DSP memories
  *
  * @codec: the HDA codec
  * @fls_data: pointer to a fast load image
index e405be7..e6d0843 100644 (file)
@@ -157,6 +157,7 @@ struct hdmi_spec {
 
        bool dyn_pin_out;
        bool dyn_pcm_assign;
+       bool dyn_pcm_no_legacy;
        bool intel_hsw_fixup;   /* apply Intel platform-specific fixups */
        /*
         * Non-generic VIA/NVIDIA specific
@@ -1345,6 +1346,12 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
 {
        int i;
 
+       /* on the new machines, try to assign the pcm slot dynamically,
+        * not use the preferred fixed map (legacy way) anymore.
+        */
+       if (spec->dyn_pcm_no_legacy)
+               goto last_try;
+
        /*
         * generic_hdmi_build_pcms() may allocate extra PCMs on some
         * platforms (with maximum of 'num_nids + dev_num - 1')
@@ -1374,6 +1381,7 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
                        return i;
        }
 
+ last_try:
        /* the last try; check the empty slots in pins */
        for (i = 0; i < spec->num_nids; i++) {
                if (!test_bit(i, &spec->pcm_bitmap))
@@ -2987,8 +2995,16 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec)
         * the index indicate the port number.
         */
        static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf};
+       int ret;
 
-       return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
+       ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
+       if (!ret) {
+               struct hdmi_spec *spec = codec->spec;
+
+               spec->dyn_pcm_no_legacy = true;
+       }
+
+       return ret;
 }
 
 /* Intel Baytrail and Braswell; with eld notifier */
index 1927605..b47504f 100644 (file)
@@ -2532,6 +2532,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
+       SND_PCI_QUIRK(0x1462, 0xcc34, "MSI Godlike X570", ALC1220_FIXUP_GB_DUAL_CODECS),
        SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
        SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
        SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
@@ -6396,6 +6397,7 @@ enum {
        ALC269_FIXUP_LEMOTE_A1802,
        ALC269_FIXUP_LEMOTE_A190X,
        ALC256_FIXUP_INTEL_NUC8_RUGGED,
+       ALC256_FIXUP_INTEL_NUC10,
        ALC255_FIXUP_XIAOMI_HEADSET_MIC,
        ALC274_FIXUP_HP_MIC,
        ALC274_FIXUP_HP_HEADSET_MIC,
@@ -6406,6 +6408,7 @@ enum {
        ALC236_FIXUP_DELL_AIO_HEADSET_MIC,
        ALC282_FIXUP_ACER_DISABLE_LINEOUT,
        ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST,
+       ALC256_FIXUP_ACER_HEADSET_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -7782,6 +7785,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MODE
        },
+       [ALC256_FIXUP_INTEL_NUC10] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE
+       },
        [ALC255_FIXUP_XIAOMI_HEADSET_MIC] = {
                .type = HDA_FIXUP_VERBS,
                .v.verbs = (const struct hda_verb[]) {
@@ -7853,6 +7865,16 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC255_FIXUP_ACER_MIC_NO_PRESENCE,
        },
+       [ALC256_FIXUP_ACER_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x02a1113c }, /* use as headset mic, without its own jack detect */
+                       { 0x1a, 0x90a1092f }, /* use as internal mic */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7879,9 +7901,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK),
        SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
        SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
@@ -8128,6 +8152,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[5|7][0-9]RZ[Q]", ALC269_FIXUP_DMIC),
        SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -8222,6 +8247,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
        SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
        SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
+       SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
 
 #if 0
        /* Below is a quirk table taken from the old code.
index 3e8b6c0..8dfc165 100644 (file)
@@ -9,29 +9,34 @@ config SND_SOC_SOF_TOPLEVEL
 
 if SND_SOC_SOF_TOPLEVEL
 
+config SND_SOC_SOF_PCI_DEV
+       tristate
+
 config SND_SOC_SOF_PCI
        tristate "SOF PCI enumeration support"
        depends on PCI
-       select SND_SOC_SOF
-       select SND_SOC_ACPI if ACPI
        help
          This adds support for PCI enumeration. This option is
          required to enable Intel Skylake+ devices.
+         For backwards-compatibility with previous configurations the selection will
+         be used as default for platform-specific drivers.
          Say Y if you need this option.
          If unsure select "N".
 
 config SND_SOC_SOF_ACPI
        tristate "SOF ACPI enumeration support"
        depends on ACPI || COMPILE_TEST
-       select SND_SOC_SOF
-       select SND_SOC_ACPI if ACPI
-       select IOSF_MBI if X86 && PCI
        help
          This adds support for ACPI enumeration. This option is required
          to enable Intel Broadwell/Baytrail/Cherrytrail devices.
+         For backwards-compatibility with previous configurations the selection will
+         be used as default for platform-specific drivers.
          Say Y if you need this option.
          If unsure select "N".
 
+config SND_SOC_SOF_ACPI_DEV
+       tristate
+
 config SND_SOC_SOF_OF
        tristate "SOF OF enumeration support"
        depends on OF || COMPILE_TEST
index 05718df..606d813 100644 (file)
@@ -14,9 +14,9 @@ obj-$(CONFIG_SND_SOC_SOF) += snd-sof.o
 obj-$(CONFIG_SND_SOC_SOF_NOCODEC) += snd-sof-nocodec.o
 
 
-obj-$(CONFIG_SND_SOC_SOF_ACPI) += snd-sof-acpi.o
+obj-$(CONFIG_SND_SOC_SOF_ACPI_DEV) += snd-sof-acpi.o
 obj-$(CONFIG_SND_SOC_SOF_OF) += snd-sof-of.o
-obj-$(CONFIG_SND_SOC_SOF_PCI) += snd-sof-pci.o
+obj-$(CONFIG_SND_SOC_SOF_PCI_DEV) += snd-sof-pci.o
 
 obj-$(CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL) += intel/
 obj-$(CONFIG_SND_SOC_SOF_IMX_TOPLEVEL) += imx/
index 4797a1c..da1c396 100644 (file)
@@ -9,31 +9,6 @@ config SND_SOC_SOF_INTEL_TOPLEVEL
 
 if SND_SOC_SOF_INTEL_TOPLEVEL
 
-config SND_SOC_SOF_INTEL_ACPI
-       def_tristate SND_SOC_SOF_ACPI
-       select SND_SOC_SOF_BAYTRAIL  if SND_SOC_SOF_BAYTRAIL_SUPPORT
-       select SND_SOC_SOF_BROADWELL if SND_SOC_SOF_BROADWELL_SUPPORT
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_INTEL_PCI
-       def_tristate SND_SOC_SOF_PCI
-       select SND_SOC_SOF_MERRIFIELD  if SND_SOC_SOF_MERRIFIELD_SUPPORT
-       select SND_SOC_SOF_APOLLOLAKE  if SND_SOC_SOF_APOLLOLAKE_SUPPORT
-       select SND_SOC_SOF_GEMINILAKE  if SND_SOC_SOF_GEMINILAKE_SUPPORT
-       select SND_SOC_SOF_CANNONLAKE  if SND_SOC_SOF_CANNONLAKE_SUPPORT
-       select SND_SOC_SOF_COFFEELAKE  if SND_SOC_SOF_COFFEELAKE_SUPPORT
-       select SND_SOC_SOF_ICELAKE     if SND_SOC_SOF_ICELAKE_SUPPORT
-       select SND_SOC_SOF_COMETLAKE   if SND_SOC_SOF_COMETLAKE_SUPPORT
-       select SND_SOC_SOF_TIGERLAKE   if SND_SOC_SOF_TIGERLAKE_SUPPORT
-       select SND_SOC_SOF_ELKHARTLAKE if SND_SOC_SOF_ELKHARTLAKE_SUPPORT
-       select SND_SOC_SOF_JASPERLAKE  if SND_SOC_SOF_JASPERLAKE_SUPPORT
-       select SND_SOC_SOF_ALDERLAKE   if SND_SOC_SOF_ALDERLAKE_SUPPORT
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
 config SND_SOC_SOF_INTEL_HIFI_EP_IPC
        tristate
        help
@@ -50,18 +25,25 @@ config SND_SOC_SOF_INTEL_ATOM_HIFI_EP
 
 config SND_SOC_SOF_INTEL_COMMON
        tristate
+       select SND_SOC_SOF
        select SND_SOC_ACPI_INTEL_MATCH
        select SND_SOC_SOF_XTENSA
        select SND_SOC_INTEL_MACH
        select SND_SOC_ACPI if ACPI
+       select SND_INTEL_DSP_CONFIG
        help
          This option is not user-selectable but automagically handled by
          'select' statements at a higher level.
 
-if SND_SOC_SOF_INTEL_ACPI
+if SND_SOC_SOF_ACPI
 
-config SND_SOC_SOF_BAYTRAIL_SUPPORT
-       bool "SOF support for Baytrail, Braswell and Cherrytrail"
+config SND_SOC_SOF_BAYTRAIL
+       tristate "SOF support for Baytrail, Braswell and Cherrytrail"
+       default SND_SOC_SOF_ACPI
+       select SND_SOC_SOF_INTEL_COMMON
+       select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
+       select SND_SOC_SOF_ACPI_DEV
+       select IOSF_MBI if X86 && PCI
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Baytrail, Braswell or Cherrytrail processors.
@@ -75,17 +57,12 @@ config SND_SOC_SOF_BAYTRAIL_SUPPORT
          Say Y if you want to enable SOF on Baytrail/Cherrytrail.
          If unsure select "N".
 
-config SND_SOC_SOF_BAYTRAIL
-       tristate
-       select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
-       select SND_INTEL_DSP_CONFIG
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_BROADWELL_SUPPORT
-       bool "SOF support for Broadwell"
-       select SND_INTEL_DSP_CONFIG
+config SND_SOC_SOF_BROADWELL
+       tristate "SOF support for Broadwell"
+       default SND_SOC_SOF_ACPI
+       select SND_SOC_SOF_INTEL_COMMON
+       select SND_SOC_SOF_INTEL_HIFI_EP_IPC
+       select SND_SOC_SOF_ACPI_DEV
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Broadwell processors.
@@ -100,197 +77,143 @@ config SND_SOC_SOF_BROADWELL_SUPPORT
          Say Y if you want to enable SOF on Broadwell.
          If unsure select "N".
 
-config SND_SOC_SOF_BROADWELL
-       tristate
-       select SND_SOC_SOF_INTEL_COMMON
-       select SND_SOC_SOF_INTEL_HIFI_EP_IPC
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-endif ## SND_SOC_SOF_INTEL_ACPI
+endif ## SND_SOC_SOF_ACPI
 
-if SND_SOC_SOF_INTEL_PCI
+if SND_SOC_SOF_PCI
 
-config SND_SOC_SOF_MERRIFIELD_SUPPORT
-       bool "SOF support for Tangier/Merrifield"
+config SND_SOC_SOF_MERRIFIELD
+       tristate "SOF support for Tangier/Merrifield"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Tangier/Merrifield processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_MERRIFIELD
+config SND_SOC_SOF_INTEL_APL
        tristate
-       select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
+       select SND_SOC_SOF_HDA_COMMON
 
-config SND_SOC_SOF_APOLLOLAKE_SUPPORT
-       bool "SOF support for Apollolake"
+config SND_SOC_SOF_APOLLOLAKE
+       tristate "SOF support for Apollolake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_APL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Apollolake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_APOLLOLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_GEMINILAKE_SUPPORT
-       bool "SOF support for GeminiLake"
+config SND_SOC_SOF_GEMINILAKE
+       tristate "SOF support for GeminiLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_APL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Geminilake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_GEMINILAKE
+config SND_SOC_SOF_INTEL_CNL
        tristate
        select SND_SOC_SOF_HDA_COMMON
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
+       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
 
-config SND_SOC_SOF_CANNONLAKE_SUPPORT
-       bool "SOF support for Cannonlake"
+config SND_SOC_SOF_CANNONLAKE
+       tristate "SOF support for Cannonlake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_CNL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Cannonlake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_CANNONLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_COFFEELAKE_SUPPORT
-       bool "SOF support for CoffeeLake"
+config SND_SOC_SOF_COFFEELAKE
+       tristate "SOF support for CoffeeLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_CNL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Coffeelake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_COFFEELAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_ICELAKE_SUPPORT
-       bool "SOF support for Icelake"
+config SND_SOC_SOF_COMETLAKE
+       tristate "SOF support for CometLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_CNL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the Icelake processors.
-         Say Y if you have such a device.
+         using the Cometlake processors.
          If unsure select "N".
 
-config SND_SOC_SOF_ICELAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_COMETLAKE
+config SND_SOC_SOF_INTEL_ICL
        tristate
        select SND_SOC_SOF_HDA_COMMON
        select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_COMETLAKE_SUPPORT
-       bool
 
-config SND_SOC_SOF_COMETLAKE_LP_SUPPORT
-       bool "SOF support for CometLake"
-       select SND_SOC_SOF_COMETLAKE_SUPPORT
+config SND_SOC_SOF_ICELAKE
+       tristate "SOF support for Icelake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_ICL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the Cometlake processors.
+         using the Icelake processors.
+         Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_TIGERLAKE_SUPPORT
-       bool "SOF support for Tigerlake"
+config SND_SOC_SOF_JASPERLAKE
+       tristate "SOF support for JasperLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_ICL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the Tigerlake processors.
+         using the JasperLake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_TIGERLAKE
+config SND_SOC_SOF_INTEL_TGL
        tristate
        select SND_SOC_SOF_HDA_COMMON
        select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
 
-config SND_SOC_SOF_ELKHARTLAKE_SUPPORT
-       bool "SOF support for ElkhartLake"
+config SND_SOC_SOF_TIGERLAKE
+       tristate "SOF support for Tigerlake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_TGL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the ElkhartLake processors.
+         using the Tigerlake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
 config SND_SOC_SOF_ELKHARTLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_JASPERLAKE_SUPPORT
-       bool "SOF support for JasperLake"
+       tristate "SOF support for ElkhartLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_TGL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the JasperLake processors.
+         using the ElkhartLake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_JASPERLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_ALDERLAKE_SUPPORT
-       bool "SOF support for Alderlake"
+config SND_SOC_SOF_ALDERLAKE
+       tristate "SOF support for Alderlake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_TGL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Alderlake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_ALDERLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level
-
 config SND_SOC_SOF_HDA_COMMON
        tristate
-       select SND_INTEL_DSP_CONFIG
        select SND_SOC_SOF_INTEL_COMMON
+       select SND_SOC_SOF_PCI_DEV
+       select SND_INTEL_DSP_CONFIG
        select SND_SOC_SOF_HDA_LINK_BASELINE
        help
          This option is not user-selectable but automagically handled by
@@ -353,29 +276,22 @@ config SND_SOC_SOF_HDA
          This option is not user-selectable but automagically handled by
          'select' statements at a higher level.
 
-config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK
-       bool "SOF support for SoundWire"
-       depends on ACPI
-       help
-         This adds support for SoundWire with Sound Open Firmware
-         for Intel(R) platforms.
-         Say Y if you want to enable SoundWire links with SOF.
-         If unsure select "N".
-
 config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
        tristate
-       select SND_SOC_SOF_INTEL_SOUNDWIRE if SND_SOC_SOF_INTEL_SOUNDWIRE_LINK
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
 
 config SND_SOC_SOF_INTEL_SOUNDWIRE
-       tristate
-       select SOUNDWIRE
+       tristate "SOF support for SoundWire"
+       default SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
+       depends on SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
+       depends on ACPI && SOUNDWIRE
+       depends on !(SOUNDWIRE=m && SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=y)
        select SOUNDWIRE_INTEL
+       select SND_INTEL_SOUNDWIRE_ACPI
        help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
+         This adds support for SoundWire with Sound Open Firmware
+         for Intel(R) platforms.
+         Say Y if you want to enable SoundWire links with SOF.
+         If unsure select "N".
 
 endif ## SND_SOC_SOF_INTEL_PCI
 
index 2589111..f3d6f70 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
 
-snd-sof-intel-byt-objs := byt.o
-snd-sof-intel-bdw-objs := bdw.o
+snd-sof-acpi-intel-byt-objs := byt.o
+snd-sof-acpi-intel-bdw-objs := bdw.o
 
 snd-sof-intel-ipc-objs := intel-ipc.o
 
@@ -13,8 +13,20 @@ snd-sof-intel-hda-common-$(CONFIG_SND_SOC_SOF_HDA_PROBES) += hda-compress.o
 
 snd-sof-intel-hda-objs := hda-codec.o
 
-obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-intel-byt.o
-obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-intel-bdw.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-acpi-intel-byt.o
+obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-acpi-intel-bdw.o
 obj-$(CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC) += snd-sof-intel-ipc.o
 obj-$(CONFIG_SND_SOC_SOF_HDA_COMMON) += snd-sof-intel-hda-common.o
 obj-$(CONFIG_SND_SOC_SOF_HDA) += snd-sof-intel-hda.o
+
+snd-sof-pci-intel-tng-objs := pci-tng.o
+snd-sof-pci-intel-apl-objs := pci-apl.o
+snd-sof-pci-intel-cnl-objs := pci-cnl.o
+snd-sof-pci-intel-icl-objs := pci-icl.o
+snd-sof-pci-intel-tgl-objs := pci-tgl.o
+
+obj-$(CONFIG_SND_SOC_SOF_MERRIFIELD) += snd-sof-pci-intel-tng.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_APL) += snd-sof-pci-intel-apl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_CNL) += snd-sof-pci-intel-cnl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_ICL) += snd-sof-pci-intel-icl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_TGL) += snd-sof-pci-intel-tgl.o
index 50a4a73..fd5ae62 100644 (file)
 #include <linux/module.h>
 #include <sound/sof.h>
 #include <sound/sof/xtensa.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/intel-dsp-config.h>
 #include "../ops.h"
 #include "shim.h"
+#include "../sof-acpi-dev.h"
 #include "../sof-audio.h"
 
 /* BARs */
@@ -590,7 +594,7 @@ static struct snd_soc_dai_driver bdw_dai[] = {
 };
 
 /* broadwell ops */
-const struct snd_sof_dsp_ops sof_bdw_ops = {
+static const struct snd_sof_dsp_ops sof_bdw_ops = {
        /*Device init */
        .probe          = bdw_probe,
 
@@ -651,14 +655,69 @@ const struct snd_sof_dsp_ops sof_bdw_ops = {
 
        .arch_ops = &sof_xtensa_arch_ops,
 };
-EXPORT_SYMBOL_NS(sof_bdw_ops, SND_SOC_SOF_BROADWELL);
 
-const struct sof_intel_dsp_desc bdw_chip_info = {
+static const struct sof_intel_dsp_desc bdw_chip_info = {
        .cores_num = 1,
        .host_managed_cores_mask = 1,
 };
-EXPORT_SYMBOL_NS(bdw_chip_info, SND_SOC_SOF_BROADWELL);
+
+static const struct sof_dev_desc sof_acpi_broadwell_desc = {
+       .machines = snd_soc_acpi_intel_broadwell_machines,
+       .resindex_lpe_base = 0,
+       .resindex_pcicfg_base = 1,
+       .resindex_imr_base = -1,
+       .irqindex_host_ipc = 0,
+       .chip_info = &bdw_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-bdw.ri",
+       .nocodec_tplg_filename = "sof-bdw-nocodec.tplg",
+       .ops = &sof_bdw_ops,
+};
+
+static const struct acpi_device_id sof_broadwell_match[] = {
+       { "INT3438", (unsigned long)&sof_acpi_broadwell_desc },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, sof_broadwell_match);
+
+static int sof_broadwell_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       const struct acpi_device_id *id;
+       const struct sof_dev_desc *desc;
+       int ret;
+
+       id = acpi_match_device(dev->driver->acpi_match_table, dev);
+       if (!id)
+               return -ENODEV;
+
+       ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
+       if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+               dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
+               return -ENODEV;
+       }
+
+       desc = device_get_match_data(dev);
+       if (!desc)
+               return -ENODEV;
+
+       return sof_acpi_probe(pdev, device_get_match_data(dev));
+}
+
+/* acpi_driver definition */
+static struct platform_driver snd_sof_acpi_intel_bdw_driver = {
+       .probe = sof_broadwell_probe,
+       .remove = sof_acpi_remove,
+       .driver = {
+               .name = "sof-audio-acpi-intel-bdw",
+               .pm = &sof_acpi_pm,
+               .acpi_match_table = sof_broadwell_match,
+       },
+};
+module_platform_driver(snd_sof_acpi_intel_bdw_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV);
index 19260db..2846fde 100644 (file)
 #include <linux/module.h>
 #include <sound/sof.h>
 #include <sound/sof/xtensa.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/intel-dsp-config.h>
 #include "../ops.h"
 #include "shim.h"
+#include "../sof-acpi-dev.h"
 #include "../sof-audio.h"
 #include "../../intel/common/soc-intel-quirks.h"
 
@@ -822,7 +826,7 @@ irq:
 }
 
 /* baytrail ops */
-const struct snd_sof_dsp_ops sof_byt_ops = {
+static const struct snd_sof_dsp_ops sof_byt_ops = {
        /* device init */
        .probe          = byt_acpi_probe,
        .remove         = byt_remove,
@@ -892,16 +896,14 @@ const struct snd_sof_dsp_ops sof_byt_ops = {
 
        .arch_ops = &sof_xtensa_arch_ops,
 };
-EXPORT_SYMBOL_NS(sof_byt_ops, SND_SOC_SOF_BAYTRAIL);
 
-const struct sof_intel_dsp_desc byt_chip_info = {
+static const struct sof_intel_dsp_desc byt_chip_info = {
        .cores_num = 1,
        .host_managed_cores_mask = 1,
 };
-EXPORT_SYMBOL_NS(byt_chip_info, SND_SOC_SOF_BAYTRAIL);
 
 /* cherrytrail and braswell ops */
-const struct snd_sof_dsp_ops sof_cht_ops = {
+static const struct snd_sof_dsp_ops sof_cht_ops = {
        /* device init */
        .probe          = byt_acpi_probe,
        .remove         = byt_remove,
@@ -972,16 +974,104 @@ const struct snd_sof_dsp_ops sof_cht_ops = {
 
        .arch_ops = &sof_xtensa_arch_ops,
 };
-EXPORT_SYMBOL_NS(sof_cht_ops, SND_SOC_SOF_BAYTRAIL);
 
-const struct sof_intel_dsp_desc cht_chip_info = {
+static const struct sof_intel_dsp_desc cht_chip_info = {
        .cores_num = 1,
        .host_managed_cores_mask = 1,
 };
-EXPORT_SYMBOL_NS(cht_chip_info, SND_SOC_SOF_BAYTRAIL);
+
+/* BYTCR uses different IRQ index */
+static const struct sof_dev_desc sof_acpi_baytrailcr_desc = {
+       .machines = snd_soc_acpi_intel_baytrail_machines,
+       .resindex_lpe_base = 0,
+       .resindex_pcicfg_base = 1,
+       .resindex_imr_base = 2,
+       .irqindex_host_ipc = 0,
+       .chip_info = &byt_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-byt.ri",
+       .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
+       .ops = &sof_byt_ops,
+};
+
+static const struct sof_dev_desc sof_acpi_baytrail_desc = {
+       .machines = snd_soc_acpi_intel_baytrail_machines,
+       .resindex_lpe_base = 0,
+       .resindex_pcicfg_base = 1,
+       .resindex_imr_base = 2,
+       .irqindex_host_ipc = 5,
+       .chip_info = &byt_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-byt.ri",
+       .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
+       .ops = &sof_byt_ops,
+};
+
+static const struct sof_dev_desc sof_acpi_cherrytrail_desc = {
+       .machines = snd_soc_acpi_intel_cherrytrail_machines,
+       .resindex_lpe_base = 0,
+       .resindex_pcicfg_base = 1,
+       .resindex_imr_base = 2,
+       .irqindex_host_ipc = 5,
+       .chip_info = &cht_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-cht.ri",
+       .nocodec_tplg_filename = "sof-cht-nocodec.tplg",
+       .ops = &sof_cht_ops,
+};
+
+static const struct acpi_device_id sof_baytrail_match[] = {
+       { "80860F28", (unsigned long)&sof_acpi_baytrail_desc },
+       { "808622A8", (unsigned long)&sof_acpi_cherrytrail_desc },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, sof_baytrail_match);
+
+static int sof_baytrail_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       const struct sof_dev_desc *desc;
+       const struct acpi_device_id *id;
+       int ret;
+
+       id = acpi_match_device(dev->driver->acpi_match_table, dev);
+       if (!id)
+               return -ENODEV;
+
+       ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
+       if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+               dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
+               return -ENODEV;
+       }
+
+       desc = device_get_match_data(&pdev->dev);
+       if (!desc)
+               return -ENODEV;
+
+       if (desc == &sof_acpi_baytrail_desc && soc_intel_is_byt_cr(pdev))
+               desc = &sof_acpi_baytrailcr_desc;
+
+       return sof_acpi_probe(pdev, desc);
+}
+
+/* acpi_driver definition */
+static struct platform_driver snd_sof_acpi_intel_byt_driver = {
+       .probe = sof_baytrail_probe,
+       .remove = sof_acpi_remove,
+       .driver = {
+               .name = "sof-audio-acpi-intel-byt",
+               .pm = &sof_acpi_pm,
+               .acpi_match_table = sof_baytrail_match,
+       },
+};
+module_platform_driver(snd_sof_acpi_intel_byt_driver);
 
 #endif /* CONFIG_SND_SOC_SOF_BAYTRAIL */
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV);
index 0dc3a8c..1d29b1f 100644 (file)
 #include <linux/module.h>
 #include <linux/soundwire/sdw.h>
 #include <linux/soundwire/sdw_intel.h>
+#include <sound/intel-dsp-config.h>
 #include <sound/intel-nhlt.h>
 #include <sound/sof.h>
 #include <sound/sof/xtensa.h>
 #include "../sof-audio.h"
+#include "../sof-pci-dev.h"
 #include "../ops.h"
 #include "hda.h"
 
@@ -1258,8 +1260,24 @@ void hda_machine_select(struct snd_sof_dev *sdev)
                dev_warn(sdev->dev, "warning: No matching ASoC machine driver found\n");
 }
 
+int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+       int ret;
+
+       ret = snd_intel_dsp_driver_probe(pci);
+       if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+               dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n");
+               return -ENODEV;
+       }
+
+       return sof_pci_probe(pci, pci_id);
+}
+EXPORT_SYMBOL_NS(hda_pci_intel_probe, SND_SOC_SOF_INTEL_HDA_COMMON);
+
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC);
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI);
 MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT);
index d1c38c3..7c7579d 100644 (file)
@@ -764,4 +764,7 @@ void hda_machine_select(struct snd_sof_dev *sdev);
 void hda_set_mach_params(const struct snd_soc_acpi_mach *mach,
                         struct device *dev);
 
+/* PCI driver selection and probe */
+int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id);
+
 #endif
diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c
new file mode 100644 (file)
index 0000000..f89e746
--- /dev/null
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc bxt_desc = {
+       .machines               = snd_soc_acpi_intel_bxt_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &apl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-apl.ri",
+       .nocodec_tplg_filename = "sof-apl-nocodec.tplg",
+       .ops = &sof_apl_ops,
+};
+
+static const struct sof_dev_desc glk_desc = {
+       .machines               = snd_soc_acpi_intel_glk_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &apl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-glk.ri",
+       .nocodec_tplg_filename = "sof-glk-nocodec.tplg",
+       .ops = &sof_apl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0x5a98), /* BXT-P (ApolloLake) */
+               .driver_data = (unsigned long)&bxt_desc},
+       { PCI_DEVICE(0x8086, 0x1a98),/* BXT-T */
+               .driver_data = (unsigned long)&bxt_desc},
+       { PCI_DEVICE(0x8086, 0x3198), /* GeminiLake */
+               .driver_data = (unsigned long)&glk_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_apl_driver = {
+       .name = "sof-audio-pci-intel-apl",
+       .id_table = sof_pci_ids,
+       .probe = hda_pci_intel_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_apl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c
new file mode 100644 (file)
index 0000000..f23257a
--- /dev/null
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc cnl_desc = {
+       .machines               = snd_soc_acpi_intel_cnl_machines,
+       .alt_machines           = snd_soc_acpi_intel_cnl_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &cnl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-cnl.ri",
+       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+static const struct sof_dev_desc cfl_desc = {
+       .machines               = snd_soc_acpi_intel_cfl_machines,
+       .alt_machines           = snd_soc_acpi_intel_cfl_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &cnl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-cfl.ri",
+       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+static const struct sof_dev_desc cml_desc = {
+       .machines               = snd_soc_acpi_intel_cml_machines,
+       .alt_machines           = snd_soc_acpi_intel_cml_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &cnl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-cml.ri",
+       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0x9dc8), /* CNL-LP */
+               .driver_data = (unsigned long)&cnl_desc},
+       { PCI_DEVICE(0x8086, 0xa348), /* CNL-H */
+               .driver_data = (unsigned long)&cfl_desc},
+       { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */
+               .driver_data = (unsigned long)&cml_desc},
+       { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */
+               .driver_data = (unsigned long)&cml_desc},
+       { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */
+               .driver_data = (unsigned long)&cml_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_cnl_driver = {
+       .name = "sof-audio-pci-intel-cnl",
+       .id_table = sof_pci_ids,
+       .probe = hda_pci_intel_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_cnl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c
new file mode 100644 (file)
index 0000000..2f60c28
--- /dev/null
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc icl_desc = {
+       .machines               = snd_soc_acpi_intel_icl_machines,
+       .alt_machines           = snd_soc_acpi_intel_icl_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &icl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-icl.ri",
+       .nocodec_tplg_filename = "sof-icl-nocodec.tplg",
+       .ops = &sof_icl_ops,
+};
+
+static const struct sof_dev_desc jsl_desc = {
+       .machines               = snd_soc_acpi_intel_jsl_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &jsl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-jsl.ri",
+       .nocodec_tplg_filename = "sof-jsl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */
+               .driver_data = (unsigned long)&icl_desc},
+       { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */
+               .driver_data = (unsigned long)&icl_desc},
+       { PCI_DEVICE(0x8086, 0x38c8), /* ICL-N */
+               .driver_data = (unsigned long)&jsl_desc},
+       { PCI_DEVICE(0x8086, 0x4dc8), /* JSL-N */
+               .driver_data = (unsigned long)&jsl_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_icl_driver = {
+       .name = "sof-audio-pci-intel-icl",
+       .id_table = sof_pci_ids,
+       .probe = hda_pci_intel_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_icl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c
new file mode 100644 (file)
index 0000000..4856074
--- /dev/null
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc tgl_desc = {
+       .machines               = snd_soc_acpi_intel_tgl_machines,
+       .alt_machines           = snd_soc_acpi_intel_tgl_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &tgl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-tgl.ri",
+       .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
+       .ops = &sof_tgl_ops,
+};
+
+static const struct sof_dev_desc tglh_desc = {
+       .machines               = snd_soc_acpi_intel_tgl_machines,
+       .alt_machines           = snd_soc_acpi_intel_tgl_sdw_machines,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &tglh_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-tgl-h.ri",
+       .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
+       .ops = &sof_tgl_ops,
+};
+
+static const struct sof_dev_desc ehl_desc = {
+       .machines               = snd_soc_acpi_intel_ehl_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &ehl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-ehl.ri",
+       .nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+static const struct sof_dev_desc adls_desc = {
+       .machines               = snd_soc_acpi_intel_adl_machines,
+       .alt_machines           = snd_soc_acpi_intel_adl_sdw_machines,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &adls_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-adl-s.ri",
+       .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
+       .ops = &sof_tgl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */
+               .driver_data = (unsigned long)&tgl_desc},
+       { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */
+               .driver_data = (unsigned long)&tglh_desc},
+       { PCI_DEVICE(0x8086, 0x4b55), /* EHL */
+               .driver_data = (unsigned long)&ehl_desc},
+       { PCI_DEVICE(0x8086, 0x4b58), /* EHL */
+               .driver_data = (unsigned long)&ehl_desc},
+       { PCI_DEVICE(0x8086, 0x7ad0), /* ADL-S */
+               .driver_data = (unsigned long)&adls_desc},
+       { PCI_DEVICE(0x8086, 0x51c8), /* ADL-P */
+               .driver_data = (unsigned long)&tgl_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_tgl_driver = {
+       .name = "sof-audio-pci-intel-tgl",
+       .id_table = sof_pci_ids,
+       .probe = hda_pci_intel_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_tgl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
+
diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c
new file mode 100644 (file)
index 0000000..94b9704
--- /dev/null
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "shim.h"
+
+static struct snd_soc_acpi_mach sof_tng_machines[] = {
+       {
+               .id = "INT343A",
+               .drv_name = "edison",
+               .sof_fw_filename = "sof-byt.ri",
+               .sof_tplg_filename = "sof-byt.tplg",
+       },
+       {}
+};
+
+static const struct sof_dev_desc tng_desc = {
+       .machines               = sof_tng_machines,
+       .resindex_lpe_base      = 3,    /* IRAM, but subtract IRAM offset */
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = 0,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &tng_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-byt.ri",
+       .nocodec_tplg_filename = "sof-byt.tplg",
+       .ops = &sof_tng_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0x119a),
+               .driver_data = (unsigned long)&tng_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_tng_driver = {
+       .name = "sof-audio-pci-intel-tng",
+       .id_table = sof_pci_ids,
+       .probe = sof_pci_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_tng_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_MERRIFIELD);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
index 1e0afb5..529f68d 100644 (file)
@@ -167,13 +167,7 @@ struct sof_intel_dsp_desc {
 };
 
 extern const struct snd_sof_dsp_ops sof_tng_ops;
-extern const struct snd_sof_dsp_ops sof_byt_ops;
-extern const struct snd_sof_dsp_ops sof_cht_ops;
-extern const struct snd_sof_dsp_ops sof_bdw_ops;
 
-extern const struct sof_intel_dsp_desc byt_chip_info;
-extern const struct sof_intel_dsp_desc cht_chip_info;
-extern const struct sof_intel_dsp_desc bdw_chip_info;
 extern const struct sof_intel_dsp_desc tng_chip_info;
 
 struct sof_intel_stream {
index cc2e257..1fec042 100644 (file)
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
-#include <sound/intel-dsp-config.h>
 #include <sound/soc-acpi.h>
 #include <sound/soc-acpi-intel-match.h>
 #include <sound/sof.h>
 #include "../intel/common/soc-intel-quirks.h"
 #include "ops.h"
+#include "sof-acpi-dev.h"
 
 /* platform specific devices */
 #include "intel/shim.h"
@@ -36,74 +36,12 @@ MODULE_PARM_DESC(sof_acpi_debug, "SOF ACPI debug options (0x0 all off)");
 
 #define SOF_ACPI_DISABLE_PM_RUNTIME BIT(0)
 
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BROADWELL)
-static const struct sof_dev_desc sof_acpi_broadwell_desc = {
-       .machines = snd_soc_acpi_intel_broadwell_machines,
-       .resindex_lpe_base = 0,
-       .resindex_pcicfg_base = 1,
-       .resindex_imr_base = -1,
-       .irqindex_host_ipc = 0,
-       .chip_info = &bdw_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-bdw.ri",
-       .nocodec_tplg_filename = "sof-bdw-nocodec.tplg",
-       .ops = &sof_bdw_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
-
-/* BYTCR uses different IRQ index */
-static const struct sof_dev_desc sof_acpi_baytrailcr_desc = {
-       .machines = snd_soc_acpi_intel_baytrail_machines,
-       .resindex_lpe_base = 0,
-       .resindex_pcicfg_base = 1,
-       .resindex_imr_base = 2,
-       .irqindex_host_ipc = 0,
-       .chip_info = &byt_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-byt.ri",
-       .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
-       .ops = &sof_byt_ops,
-};
-
-static const struct sof_dev_desc sof_acpi_baytrail_desc = {
-       .machines = snd_soc_acpi_intel_baytrail_machines,
-       .resindex_lpe_base = 0,
-       .resindex_pcicfg_base = 1,
-       .resindex_imr_base = 2,
-       .irqindex_host_ipc = 5,
-       .chip_info = &byt_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-byt.ri",
-       .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
-       .ops = &sof_byt_ops,
-};
-
-static const struct sof_dev_desc sof_acpi_cherrytrail_desc = {
-       .machines = snd_soc_acpi_intel_cherrytrail_machines,
-       .resindex_lpe_base = 0,
-       .resindex_pcicfg_base = 1,
-       .resindex_imr_base = 2,
-       .irqindex_host_ipc = 5,
-       .chip_info = &cht_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-cht.ri",
-       .nocodec_tplg_filename = "sof-cht-nocodec.tplg",
-       .ops = &sof_cht_ops,
-};
-
-#endif
-
-static const struct dev_pm_ops sof_acpi_pm = {
+const struct dev_pm_ops sof_acpi_pm = {
        SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume)
        SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume,
                           snd_sof_runtime_idle)
 };
+EXPORT_SYMBOL_NS(sof_acpi_pm, SND_SOC_SOF_ACPI_DEV);
 
 static void sof_acpi_probe_complete(struct device *dev)
 {
@@ -118,41 +56,19 @@ static void sof_acpi_probe_complete(struct device *dev)
        pm_runtime_enable(dev);
 }
 
-static int sof_acpi_probe(struct platform_device *pdev)
+int sof_acpi_probe(struct platform_device *pdev, const struct sof_dev_desc *desc)
 {
        struct device *dev = &pdev->dev;
-       const struct acpi_device_id *id;
-       const struct sof_dev_desc *desc;
        struct snd_sof_pdata *sof_pdata;
        const struct snd_sof_dsp_ops *ops;
        int ret;
 
-       id = acpi_match_device(dev->driver->acpi_match_table, dev);
-       if (!id)
-               return -ENODEV;
-
-       if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) {
-               ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
-               if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
-                       dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
-                       return -ENODEV;
-               }
-       }
        dev_dbg(dev, "ACPI DSP detected");
 
        sof_pdata = devm_kzalloc(dev, sizeof(*sof_pdata), GFP_KERNEL);
        if (!sof_pdata)
                return -ENOMEM;
 
-       desc = device_get_match_data(dev);
-       if (!desc)
-               return -ENODEV;
-
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
-       if (desc == &sof_acpi_baytrail_desc && soc_intel_is_byt_cr(pdev))
-               desc = &sof_acpi_baytrailcr_desc;
-#endif
-
        /* get ops for platform */
        ops = desc->ops;
        if (!ops) {
@@ -194,44 +110,20 @@ static int sof_acpi_probe(struct platform_device *pdev)
 
        return ret;
 }
+EXPORT_SYMBOL_NS(sof_acpi_probe, SND_SOC_SOF_ACPI_DEV);
 
-static int sof_acpi_remove(struct platform_device *pdev)
+int sof_acpi_remove(struct platform_device *pdev)
 {
+       struct device *dev = &pdev->dev;
+
        if (!(sof_acpi_debug & SOF_ACPI_DISABLE_PM_RUNTIME))
-               pm_runtime_disable(&pdev->dev);
+               pm_runtime_disable(dev);
 
        /* call sof helper for DSP hardware remove */
-       snd_sof_device_remove(&pdev->dev);
+       snd_sof_device_remove(dev);
 
        return 0;
 }
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id sof_acpi_match[] = {
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_BROADWELL)
-       { "INT3438", (unsigned long)&sof_acpi_broadwell_desc },
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
-       { "80860F28", (unsigned long)&sof_acpi_baytrail_desc },
-       { "808622A8", (unsigned long)&sof_acpi_cherrytrail_desc },
-#endif
-       { }
-};
-MODULE_DEVICE_TABLE(acpi, sof_acpi_match);
-#endif
-
-/* acpi_driver definition */
-static struct platform_driver snd_sof_acpi_driver = {
-       .probe = sof_acpi_probe,
-       .remove = sof_acpi_remove,
-       .driver = {
-               .name = "sof-audio-acpi",
-               .pm = &sof_acpi_pm,
-               .acpi_match_table = ACPI_PTR(sof_acpi_match),
-       },
-};
-module_platform_driver(snd_sof_acpi_driver);
+EXPORT_SYMBOL_NS(sof_acpi_remove, SND_SOC_SOF_ACPI_DEV);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_IMPORT_NS(SND_SOC_SOF_BAYTRAIL);
-MODULE_IMPORT_NS(SND_SOC_SOF_BROADWELL);
diff --git a/sound/soc/sof/sof-acpi-dev.h b/sound/soc/sof/sof-acpi-dev.h
new file mode 100644 (file)
index 0000000..5c2b558
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __SOUND_SOC_SOF_ACPI_H
+#define __SOUND_SOC_SOF_ACPI_H
+
+extern const struct dev_pm_ops sof_acpi_pm;
+int sof_acpi_probe(struct platform_device *pdev, const struct sof_dev_desc *desc);
+int sof_acpi_remove(struct platform_device *pdev);
+
+#endif
index fd1f0d8..b842a41 100644 (file)
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
-#include <sound/intel-dsp-config.h>
 #include <sound/soc-acpi.h>
 #include <sound/soc-acpi-intel-match.h>
 #include <sound/sof.h>
 #include "ops.h"
-
-/* platform specific devices */
-#include "intel/shim.h"
-#include "intel/hda.h"
+#include "sof-pci-dev.h"
 
 static char *fw_path;
 module_param(fw_path, charp, 0444);
@@ -81,243 +77,14 @@ static const struct dmi_system_id community_key_platforms[] = {
        {},
 };
 
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE)
-static const struct sof_dev_desc bxt_desc = {
-       .machines               = snd_soc_acpi_intel_bxt_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &apl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-apl.ri",
-       .nocodec_tplg_filename = "sof-apl-nocodec.tplg",
-       .ops = &sof_apl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE)
-static const struct sof_dev_desc glk_desc = {
-       .machines               = snd_soc_acpi_intel_glk_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &apl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-glk.ri",
-       .nocodec_tplg_filename = "sof-glk-nocodec.tplg",
-       .ops = &sof_apl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_MERRIFIELD)
-static struct snd_soc_acpi_mach sof_tng_machines[] = {
-       {
-               .id = "INT343A",
-               .drv_name = "edison",
-               .sof_fw_filename = "sof-byt.ri",
-               .sof_tplg_filename = "sof-byt.tplg",
-       },
-       {}
-};
-
-static const struct sof_dev_desc tng_desc = {
-       .machines               = sof_tng_machines,
-       .resindex_lpe_base      = 3,    /* IRAM, but subtract IRAM offset */
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = 0,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &tng_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-byt.ri",
-       .nocodec_tplg_filename = "sof-byt.tplg",
-       .ops = &sof_tng_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_CANNONLAKE)
-static const struct sof_dev_desc cnl_desc = {
-       .machines               = snd_soc_acpi_intel_cnl_machines,
-       .alt_machines           = snd_soc_acpi_intel_cnl_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &cnl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-cnl.ri",
-       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COFFEELAKE)
-static const struct sof_dev_desc cfl_desc = {
-       .machines               = snd_soc_acpi_intel_cfl_machines,
-       .alt_machines           = snd_soc_acpi_intel_cfl_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &cnl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-cfl.ri",
-       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE)
-static const struct sof_dev_desc cml_desc = {
-       .machines               = snd_soc_acpi_intel_cml_machines,
-       .alt_machines           = snd_soc_acpi_intel_cml_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &cnl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-cml.ri",
-       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE)
-static const struct sof_dev_desc icl_desc = {
-       .machines               = snd_soc_acpi_intel_icl_machines,
-       .alt_machines           = snd_soc_acpi_intel_icl_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &icl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-icl.ri",
-       .nocodec_tplg_filename = "sof-icl-nocodec.tplg",
-       .ops = &sof_icl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) || IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
-static const struct sof_dev_desc tgl_desc = {
-       .machines               = snd_soc_acpi_intel_tgl_machines,
-       .alt_machines           = snd_soc_acpi_intel_tgl_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &tgl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-tgl.ri",
-       .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
-       .ops = &sof_tgl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
-static const struct sof_dev_desc tglh_desc = {
-       .machines               = snd_soc_acpi_intel_tgl_machines,
-       .alt_machines           = snd_soc_acpi_intel_tgl_sdw_machines,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &tglh_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-tgl-h.ri",
-       .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
-       .ops = &sof_tgl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE)
-static const struct sof_dev_desc ehl_desc = {
-       .machines               = snd_soc_acpi_intel_ehl_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &ehl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-ehl.ri",
-       .nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
-static const struct sof_dev_desc jsl_desc = {
-       .machines               = snd_soc_acpi_intel_jsl_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &jsl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-jsl.ri",
-       .nocodec_tplg_filename = "sof-jsl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
-static const struct sof_dev_desc adls_desc = {
-       .machines               = snd_soc_acpi_intel_adl_machines,
-       .alt_machines           = snd_soc_acpi_intel_adl_sdw_machines,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &adls_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-adl-s.ri",
-       .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
-       .ops = &sof_tgl_ops,
-};
-#endif
-
-static const struct dev_pm_ops sof_pci_pm = {
+const struct dev_pm_ops sof_pci_pm = {
        .prepare = snd_sof_prepare,
        .complete = snd_sof_complete,
        SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume)
        SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume,
                           snd_sof_runtime_idle)
 };
+EXPORT_SYMBOL_NS(sof_pci_pm, SND_SOC_SOF_PCI_DEV);
 
 static void sof_pci_probe_complete(struct device *dev)
 {
@@ -343,8 +110,7 @@ static void sof_pci_probe_complete(struct device *dev)
        pm_runtime_put_noidle(dev);
 }
 
-static int sof_pci_probe(struct pci_dev *pci,
-                        const struct pci_device_id *pci_id)
+int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 {
        struct device *dev = &pci->dev;
        const struct sof_dev_desc *desc =
@@ -353,13 +119,6 @@ static int sof_pci_probe(struct pci_dev *pci,
        const struct snd_sof_dsp_ops *ops;
        int ret;
 
-       if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) {
-               ret = snd_intel_dsp_driver_probe(pci);
-               if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
-                       dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n");
-                       return -ENODEV;
-               }
-       }
        dev_dbg(&pci->dev, "PCI DSP detected");
 
        /* get ops for platform */
@@ -447,8 +206,9 @@ release_regions:
 
        return ret;
 }
+EXPORT_SYMBOL_NS(sof_pci_probe, SND_SOC_SOF_PCI_DEV);
 
-static void sof_pci_remove(struct pci_dev *pci)
+void sof_pci_remove(struct pci_dev *pci)
 {
        /* call sof helper for DSP hardware remove */
        snd_sof_device_remove(&pci->dev);
@@ -461,94 +221,12 @@ static void sof_pci_remove(struct pci_dev *pci)
        /* release pci regions and disable device */
        pci_release_regions(pci);
 }
+EXPORT_SYMBOL_NS(sof_pci_remove, SND_SOC_SOF_PCI_DEV);
 
-static void sof_pci_shutdown(struct pci_dev *pci)
+void sof_pci_shutdown(struct pci_dev *pci)
 {
        snd_sof_device_shutdown(&pci->dev);
 }
-
-/* PCI IDs */
-static const struct pci_device_id sof_pci_ids[] = {
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_MERRIFIELD)
-       { PCI_DEVICE(0x8086, 0x119a),
-               .driver_data = (unsigned long)&tng_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE)
-       /* BXT-P & Apollolake */
-       { PCI_DEVICE(0x8086, 0x5a98),
-               .driver_data = (unsigned long)&bxt_desc},
-       { PCI_DEVICE(0x8086, 0x1a98),
-               .driver_data = (unsigned long)&bxt_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE)
-       { PCI_DEVICE(0x8086, 0x3198),
-               .driver_data = (unsigned long)&glk_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_CANNONLAKE)
-       { PCI_DEVICE(0x8086, 0x9dc8),
-               .driver_data = (unsigned long)&cnl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COFFEELAKE)
-       { PCI_DEVICE(0x8086, 0xa348),
-               .driver_data = (unsigned long)&cfl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE)
-       { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */
-               .driver_data = (unsigned long)&icl_desc},
-       { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */
-               .driver_data = (unsigned long)&icl_desc},
-
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
-       { PCI_DEVICE(0x8086, 0x38c8),
-               .driver_data = (unsigned long)&jsl_desc},
-       { PCI_DEVICE(0x8086, 0x4dc8),
-               .driver_data = (unsigned long)&jsl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE)
-       { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */
-               .driver_data = (unsigned long)&cml_desc},
-       { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */
-               .driver_data = (unsigned long)&cml_desc},
-       { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */
-               .driver_data = (unsigned long)&cml_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
-       { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */
-               .driver_data = (unsigned long)&tgl_desc},
-       { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */
-               .driver_data = (unsigned long)&tglh_desc},
-
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE)
-       { PCI_DEVICE(0x8086, 0x4b55),
-               .driver_data = (unsigned long)&ehl_desc},
-       { PCI_DEVICE(0x8086, 0x4b58),
-               .driver_data = (unsigned long)&ehl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
-       { PCI_DEVICE(0x8086, 0x7ad0),
-               .driver_data = (unsigned long)&adls_desc},
-       { PCI_DEVICE(0x8086, 0x51c8),
-               .driver_data = (unsigned long)&tgl_desc},
-#endif
-       { 0, }
-};
-MODULE_DEVICE_TABLE(pci, sof_pci_ids);
-
-/* pci_driver definition */
-static struct pci_driver snd_sof_pci_driver = {
-       .name = "sof-audio-pci",
-       .id_table = sof_pci_ids,
-       .probe = sof_pci_probe,
-       .remove = sof_pci_remove,
-       .shutdown = sof_pci_shutdown,
-       .driver = {
-               .pm = &sof_pci_pm,
-       },
-};
-module_pci_driver(snd_sof_pci_driver);
+EXPORT_SYMBOL_NS(sof_pci_shutdown, SND_SOC_SOF_PCI_DEV);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_IMPORT_NS(SND_SOC_SOF_MERRIFIELD);
-MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
diff --git a/sound/soc/sof/sof-pci-dev.h b/sound/soc/sof/sof-pci-dev.h
new file mode 100644 (file)
index 0000000..81155a5
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __SOUND_SOC_SOF_PCI_H
+#define __SOUND_SOC_SOF_PCI_H
+
+extern const struct dev_pm_ops sof_pci_pm;
+int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id);
+void sof_pci_remove(struct pci_dev *pci);
+void sof_pci_shutdown(struct pci_dev *pci);
+
+#endif
index 8243652..a746802 100644 (file)
@@ -652,10 +652,10 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip,
                cur_rate = prev_rate;
 
        if (cur_rate != rate) {
-               usb_audio_warn(chip,
-                              "%d:%d: freq mismatch (RO clock): req %d, clock runs @%d\n",
-                              fmt->iface, fmt->altsetting, rate, cur_rate);
-               return -ENXIO;
+               usb_audio_dbg(chip,
+                             "%d:%d: freq mismatch: req %d, clock runs @%d\n",
+                             fmt->iface, fmt->altsetting, rate, cur_rate);
+               /* continue processing */
        }
 
 validation:
index b1c78db..b004b2e 100644 (file)
@@ -1307,6 +1307,17 @@ no_res_check:
                        /* totally crap, return an error */
                        return -EINVAL;
                }
+       } else {
+               /* if the max volume is too low, it's likely a bogus range;
+                * here we use -96dB as the threshold
+                */
+               if (cval->dBmax <= -9600) {
+                       usb_audio_info(cval->head.mixer->chip,
+                                      "%d:%d: bogus dB values (%d/%d), disabling dB reporting\n",
+                                      cval->head.id, mixer_ctrl_intf(cval->head.mixer),
+                                      cval->dBmin, cval->dBmax);
+                       cval->dBmin = cval->dBmax = 0;
+               }
        }
 
        return 0;
index a7212f1..646deb6 100644 (file)
@@ -536,6 +536,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
                .id = USB_ID(0x05a7, 0x1020),
                .map = bose_companion5_map,
        },
+       {
+               /* Corsair Virtuoso SE (wired mode) */
+               .id = USB_ID(0x1b1c, 0x0a3d),
+               .map = corsair_virtuoso_map,
+       },
+       {
+               /* Corsair Virtuoso SE (wireless mode) */
+               .id = USB_ID(0x1b1c, 0x0a3e),
+               .map = corsair_virtuoso_map,
+       },
        {
                /* Corsair Virtuoso (wired mode) */
                .id = USB_ID(0x1b1c, 0x0a41),
index bf5a0f3..e5311b6 100644 (file)
@@ -845,13 +845,19 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs)
 
        list_for_each_entry(fp, &subs->fmt_list, list) {
                ep = snd_usb_get_endpoint(chip, fp->endpoint);
-               if (ep && ep->cur_rate)
-                       return ep;
+               if (ep && ep->cur_audiofmt) {
+                       /* if EP is already opened solely for this substream,
+                        * we still allow us to change the parameter; otherwise
+                        * this substream has to follow the existing parameter
+                        */
+                       if (ep->cur_audiofmt != subs->cur_audiofmt || ep->opened > 1)
+                               return ep;
+               }
                if (!fp->implicit_fb)
                        continue;
                /* for the implicit fb, check the sync ep as well */
                ep = snd_usb_get_endpoint(chip, fp->sync_ep);
-               if (ep && ep->cur_rate)
+               if (ep && ep->cur_audiofmt)
                        return ep;
        }
        return NULL;
index 9ba4682..737b272 100644 (file)
@@ -1482,7 +1482,7 @@ static int pioneer_djm_set_format_quirk(struct snd_usb_substream *subs,
        usb_set_interface(subs->dev, 0, 1);
        // we should derive windex from fmt-sync_ep but it's not set
        snd_usb_ctl_msg(subs->stream->chip->dev,
-               usb_rcvctrlpipe(subs->stream->chip->dev, 0),
+               usb_sndctrlpipe(subs->stream->chip->dev, 0),
                0x01, 0x22, 0x0100, windex, &sr, 0x0003);
        return 0;
 }
index d787cb8..e5fbf16 100644 (file)
@@ -21,6 +21,8 @@
 #define KVM_UTIL_PGS_PER_HUGEPG 512
 #define KVM_UTIL_MIN_PFN       2
 
+static int vcpu_mmap_sz(void);
+
 /* Aligns x up to the next multiple of size. Size must be a power of 2. */
 static void *align(void *x, size_t size)
 {
@@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
                vcpu->dirty_gfns = NULL;
        }
 
-       ret = munmap(vcpu->state, sizeof(*vcpu->state));
+       ret = munmap(vcpu->state, vcpu_mmap_sz());
        TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
                "errno: %i", ret, errno);
        close(vcpu->fd);
@@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
        TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
                "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
                vcpu_mmap_sz(), sizeof(*vcpu->state));
-       vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+       vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
                PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
        TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
                "vcpu id: %u errno: %i", vcpuid, errno);
index 9246ea3..804ff5f 100644 (file)
 
 #include <stdint.h>
 #include <time.h>
+#include <sched.h>
+#include <sys/syscall.h>
 
 #define VCPU_ID                5
 
+#define SHINFO_REGION_GVA      0xc0000000ULL
 #define SHINFO_REGION_GPA      0xc0000000ULL
 #define SHINFO_REGION_SLOT     10
 #define PAGE_SIZE              4096
 
 #define PVTIME_ADDR    (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR  (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
 
 static struct kvm_vm *vm;
 
 #define XEN_HYPERCALL_MSR      0x40000000
 
+#define MIN_STEAL_TIME         50000
+
 struct pvclock_vcpu_time_info {
         u32   version;
         u32   pad0;
@@ -43,11 +51,67 @@ struct pvclock_wall_clock {
         u32   nsec;
 } __attribute__((__packed__));
 
+struct vcpu_runstate_info {
+    uint32_t state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+};
+
+#define RUNSTATE_running  0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked  2
+#define RUNSTATE_offline  3
+
 static void guest_code(void)
 {
+       struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+
+       /* Test having the host set runstates manually */
+       GUEST_SYNC(RUNSTATE_runnable);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(RUNSTATE_blocked);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(RUNSTATE_offline);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       /* Test runstate time adjust */
+       GUEST_SYNC(4);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+       /* Test runstate time set */
+       GUEST_SYNC(5);
+       GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+       /* sched_yield() should result in some 'runnable' time */
+       GUEST_SYNC(6);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
        GUEST_DONE();
 }
 
+static long get_run_delay(void)
+{
+        char path[64];
+        long val[2];
+        FILE *fp;
+
+        sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+        fp = fopen(path, "r");
+        fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+        fclose(fp);
+
+        return val[1];
+}
+
 static int cmp_timespec(struct timespec *a, struct timespec *b)
 {
        if (a->tv_sec > b->tv_sec)
@@ -66,12 +130,14 @@ int main(int argc, char *argv[])
 {
        struct timespec min_ts, max_ts, vm_ts;
 
-       if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
-             KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+       if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
                print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
                exit(KSFT_SKIP);
        }
 
+       bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+
        clock_gettime(CLOCK_REALTIME, &min_ts);
 
        vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
@@ -80,6 +146,7 @@ int main(int argc, char *argv[])
        /* Map a region for the shared_info page */
        vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
                                    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
+       virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
 
        struct kvm_xen_hvm_config hvmc = {
                .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
@@ -111,6 +178,17 @@ int main(int argc, char *argv[])
        };
        vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
 
+       if (do_runstate_tests) {
+               struct kvm_xen_vcpu_attr st = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+                       .u.gpa = RUNSTATE_ADDR,
+               };
+               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
+       }
+
+       struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);;
+       rs->state = 0x5a;
+
        for (;;) {
                volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
                struct ucall uc;
@@ -126,8 +204,56 @@ int main(int argc, char *argv[])
                case UCALL_ABORT:
                        TEST_FAIL("%s", (const char *)uc.args[0]);
                        /* NOT REACHED */
-               case UCALL_SYNC:
+               case UCALL_SYNC: {
+                       struct kvm_xen_vcpu_attr rst;
+                       long rundelay;
+
+                       /* If no runstate support, bail out early */
+                       if (!do_runstate_tests)
+                               goto done;
+
+                       TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                                   rs->time[1] + rs->time[2] + rs->time[3],
+                                   "runstate times don't add up");
+
+                       switch (uc.args[1]) {
+                       case RUNSTATE_running...RUNSTATE_offline:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+                               rst.u.runstate.state = uc.args[1];
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+                       case 4:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = (uint64_t)-1;
+                               rst.u.runstate.time_blocked =
+                                       0x5a - rs->time[RUNSTATE_blocked];
+                               rst.u.runstate.time_offline =
+                                       0x6b6b - rs->time[RUNSTATE_offline];
+                               rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+                                       rst.u.runstate.time_offline;
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case 5:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = RUNSTATE_running;
+                               rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+                               rst.u.runstate.time_blocked = 0x6b6b;
+                               rst.u.runstate.time_offline = 0x5a;
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+                       case 6:
+                               /* Yield until scheduler delay exceeds target */
+                               rundelay = get_run_delay() + MIN_STEAL_TIME;
+                               do {
+                                       sched_yield();
+                               } while (get_run_delay() < rundelay);
+                               break;
+                       }
                        break;
+               }
                case UCALL_DONE:
                        goto done;
                default:
@@ -162,6 +288,33 @@ int main(int argc, char *argv[])
        TEST_ASSERT(ti2->version && !(ti2->version & 1),
                    "Bad time_info version %x", ti->version);
 
+       if (do_runstate_tests) {
+               /*
+                * Fetch runstate and check sanity. Strictly speaking in the
+                * general case we might not expect the numbers to be identical
+                * but in this case we know we aren't running the vCPU any more.
+                */
+               struct kvm_xen_vcpu_attr rst = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+               };
+               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+               TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+               TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+                           "State entry time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+                           "Running time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+                           "Runnable time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+                           "Blocked time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+                           "Offline time mismatch");
+
+               TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                           rs->time[1] + rs->time[2] + rs->time[3],
+                           "runstate times don't add up");
+       }
        kvm_vm_free(vm);
        return 0;
 }