Merge tag 'for-5.15/parisc-3' of git://git.kernel.org/pub/scm/linux/kernel/git/deller...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 10 Sep 2021 18:52:01 +0000 (11:52 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 10 Sep 2021 18:52:01 +0000 (11:52 -0700)
Pull parisc fixes from Helge Deller:

 - Build warning fixes in Makefile and Dino PCI driver

 - Fix when sched_clock is marked unstable

 - Drop strnlen_user() in favour of generic version

 - Prevent kernel to write outside userspace signal stack

 - Remove CONFIG_SET_FS including KERNEL_DS and USER_DS from parisc and
   switch to __get/put_kernel_nofault()

* tag 'for-5.15/parisc-3' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux:
  parisc: Implement __get/put_kernel_nofault()
  parisc: Mark sched_clock unstable only if clocks are not syncronized
  parisc: Move pci_dev_is_behind_card_dino to where it is used
  parisc: Reduce sigreturn trampoline to 3 instructions
  parisc: Check user signal stack trampoline is inside TASK_SIZE
  parisc: Drop useless debug info and comments from signal.c
  parisc: Drop strnlen_user() in favour of generic version
  parisc: Add missing FORCE prerequisite in Makefile

290 files changed:
Documentation/ABI/stable/sysfs-driver-dma-idxd
Documentation/ABI/testing/debugfs-driver-habanalabs
Documentation/admin-guide/bootconfig.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/devicetree/bindings/dma/altr,msgdma.yaml
Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/dma/st,stm32-dma.yaml
Documentation/devicetree/bindings/sound/fsl,rpmsg.yaml
Documentation/devicetree/bindings/sound/mt8195-afe-pcm.yaml
Documentation/driver-api/cxl/memory-devices.rst
Documentation/gpu/drm-mm.rst
MAINTAINERS
arch/arm/Kconfig
arch/arm/Makefile
arch/arm/boot/Makefile
arch/arm/boot/compressed/Makefile
arch/arm/include/asm/div64.h
arch/arm/include/asm/gpio.h
arch/arm/include/asm/ptrace.h
arch/arm/include/asm/syscall.h
arch/arm/include/asm/thread_info.h
arch/arm/include/asm/uaccess-asm.h
arch/arm/include/asm/uaccess.h
arch/arm/include/asm/unified.h
arch/arm/include/uapi/asm/unistd.h
arch/arm/kernel/asm-offsets.c
arch/arm/kernel/entry-common.S
arch/arm/kernel/process.c
arch/arm/kernel/ptrace.c
arch/arm/kernel/signal.c
arch/arm/kernel/sys_oabi-compat.c
arch/arm/kernel/traps.c
arch/arm/lib/copy_from_user.S
arch/arm/lib/copy_to_user.S
arch/arm/tools/syscall.tbl
arch/s390/Kconfig
arch/s390/configs/debug_defconfig
arch/s390/configs/defconfig
arch/s390/configs/zfcpdump_defconfig
arch/s390/include/asm/cpu_mcf.h
arch/s390/include/asm/smp.h
arch/s390/include/asm/stacktrace.h
arch/s390/include/asm/unwind.h
arch/s390/kernel/entry.S
arch/s390/kernel/ftrace.c
arch/s390/kernel/perf_cpum_cf.c
arch/s390/kernel/setup.c
arch/s390/kernel/smp.c
arch/s390/kernel/topology.c
arch/s390/mm/gmap.c
arch/s390/mm/pgtable.c
arch/s390/pci/pci_clp.c
arch/um/Kconfig
arch/um/drivers/virt-pci.c
arch/um/drivers/virtio_uml.c
arch/um/kernel/skas/clone.c
arch/x86/um/shared/sysdep/stub_32.h
arch/x86/um/shared/sysdep/stub_64.h
arch/x86/um/stub_segv.c
drivers/cxl/Makefile
drivers/cxl/acpi.c
drivers/cxl/core.c [deleted file]
drivers/cxl/core/Makefile [new file with mode: 0644]
drivers/cxl/core/bus.c [new file with mode: 0644]
drivers/cxl/core/core.h [new file with mode: 0644]
drivers/cxl/core/memdev.c [new file with mode: 0644]
drivers/cxl/core/pmem.c [new file with mode: 0644]
drivers/cxl/core/regs.c [new file with mode: 0644]
drivers/cxl/cxl.h
drivers/cxl/cxlmem.h [new file with mode: 0644]
drivers/cxl/mem.h [deleted file]
drivers/cxl/pci.c
drivers/cxl/pci.h
drivers/cxl/pmem.c
drivers/dax/super.c
drivers/dma-buf/Kconfig
drivers/dma/Kconfig
drivers/dma/Makefile
drivers/dma/acpi-dma.c
drivers/dma/altera-msgdma.c
drivers/dma/at_xdmac.c
drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
drivers/dma/dw-axi-dmac/dw-axi-dmac.h
drivers/dma/dw/idma32.c
drivers/dma/dw/internal.h
drivers/dma/dw/of.c
drivers/dma/dw/pci.c
drivers/dma/dw/platform.c
drivers/dma/ep93xx_dma.c
drivers/dma/fsl-dpaa2-qdma/dpaa2-qdma.c
drivers/dma/hisi_dma.c
drivers/dma/idxd/Makefile
drivers/dma/idxd/bus.c [new file with mode: 0644]
drivers/dma/idxd/cdev.c
drivers/dma/idxd/compat.c [new file with mode: 0644]
drivers/dma/idxd/device.c
drivers/dma/idxd/dma.c
drivers/dma/idxd/idxd.h
drivers/dma/idxd/init.c
drivers/dma/idxd/irq.c
drivers/dma/idxd/registers.h
drivers/dma/idxd/submit.c
drivers/dma/idxd/sysfs.c
drivers/dma/ppc4xx/adma.c
drivers/dma/ptdma/Kconfig [new file with mode: 0644]
drivers/dma/ptdma/Makefile [new file with mode: 0644]
drivers/dma/ptdma/ptdma-debugfs.c [new file with mode: 0644]
drivers/dma/ptdma/ptdma-dev.c [new file with mode: 0644]
drivers/dma/ptdma/ptdma-dmaengine.c [new file with mode: 0644]
drivers/dma/ptdma/ptdma-pci.c [new file with mode: 0644]
drivers/dma/ptdma/ptdma.h [new file with mode: 0644]
drivers/dma/sh/Kconfig
drivers/dma/sh/Makefile
drivers/dma/sh/rz-dmac.c [new file with mode: 0644]
drivers/dma/sh/usb-dmac.c
drivers/dma/sprd-dma.c
drivers/dma/stm32-dma.c
drivers/dma/tegra210-adma.c
drivers/dma/ti/k3-psil-j721e.c
drivers/dma/xilinx/xilinx_dma.c
drivers/dma/xilinx/zynqmp_dma.c
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
drivers/gpu/drm/amd/pm/amdgpu_pm.c
drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
drivers/gpu/drm/i915/gt/intel_gt_requests.h
drivers/gpu/drm/mgag200/mgag200_pll.c
drivers/gpu/drm/panfrost/panfrost_mmu.c
drivers/gpu/drm/panfrost/panfrost_regs.h
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_bo_util.c
drivers/gpu/drm/ttm/ttm_tt.c
drivers/gpu/drm/vc4/vc4_hdmi.c
drivers/infiniband/hw/bnxt_re/ib_verbs.c
drivers/infiniband/hw/hfi1/trace.c
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/qib/qib_sysfs.c
drivers/iommu/Kconfig
drivers/iommu/amd/init.c
drivers/iommu/intel/svm.c
drivers/md/dm-table.c
drivers/md/dm.c
drivers/misc/habanalabs/common/Makefile
drivers/misc/habanalabs/common/command_buffer.c
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/context.c
drivers/misc/habanalabs/common/debugfs.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/habanalabs_ioctl.c
drivers/misc/habanalabs/common/hw_queue.c
drivers/misc/habanalabs/common/memory.c
drivers/misc/habanalabs/common/mmu/mmu_v1.c
drivers/misc/habanalabs/common/pci/pci.c
drivers/misc/habanalabs/common/state_dump.c [new file with mode: 0644]
drivers/misc/habanalabs/common/sysfs.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/gaudi/gaudiP.h
drivers/misc/habanalabs/gaudi/gaudi_coresight.c
drivers/misc/habanalabs/gaudi/gaudi_security.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/include/common/cpucp_if.h
drivers/misc/habanalabs/include/common/hl_boot_if.h
drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
drivers/nvdimm/label.c
drivers/nvdimm/label.h
drivers/nvdimm/namespace_devs.c
drivers/nvdimm/nd.h
drivers/nvdimm/pmem.c
drivers/s390/block/Kconfig
drivers/s390/block/Makefile
drivers/s390/block/xpram.c [deleted file]
drivers/s390/char/con3270.c
drivers/s390/char/ctrlchar.c
drivers/s390/char/hmcdrv_ftp.c
drivers/s390/char/sclp.c
drivers/s390/cio/blacklist.c
drivers/s390/cio/device.c
drivers/s390/cio/device_id.c
drivers/s390/crypto/zcrypt_api.c
drivers/s390/crypto/zcrypt_cex2a.c
drivers/s390/crypto/zcrypt_cex2c.c
drivers/s390/crypto/zcrypt_cex4.c
drivers/s390/crypto/zcrypt_msgtype50.c
drivers/s390/crypto/zcrypt_msgtype6.c
drivers/s390/scsi/zfcp_dbf.c
drivers/s390/scsi/zfcp_fsf.c
drivers/s390/scsi/zfcp_qdio.c
drivers/s390/scsi/zfcp_unit.c
drivers/video/fbdev/core/fbmem.c
fs/Kconfig
fs/attr.c
fs/btrfs/disk-io.c
fs/btrfs/ioctl.c
fs/btrfs/misc.h
fs/btrfs/ordered-data.c
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/erofs/super.c
fs/eventpoll.c
fs/ext2/super.c
fs/ext4/super.c
fs/filesystems.c
fs/gfs2/inode.c
fs/hostfs/hostfs_kern.c
fs/io_uring.c
fs/ksmbd/ndr.c
fs/ksmbd/oplock.c
fs/ksmbd/smb2pdu.c
fs/ksmbd/smb_common.c
fs/ksmbd/smb_common.h
fs/ksmbd/smbacl.c
fs/ksmbd/smbacl.h
fs/ksmbd/transport_rdma.c
fs/ksmbd/vfs.c
fs/ksmbd/vfs.h
fs/ksmbd/vfs_cache.c
fs/ksmbd/vfs_cache.h
fs/notify/mark.c
fs/xfs/xfs_super.c
include/asm-generic/div64.h
include/drm/ttm/ttm_tt.h
include/linux/dax.h
include/linux/dmaengine.h
include/linux/eventpoll.h
include/linux/fs.h
include/linux/platform_data/dma-dw.h
include/linux/syscalls.h
include/linux/uio.h
include/uapi/linux/cxl_mem.h
include/uapi/linux/idxd.h
include/uapi/linux/virtio_pcidev.h
include/uapi/misc/habanalabs.h
init/do_mounts.c
init/main.c
ipc/sem.c
kernel/trace/trace.c
kernel/trace/trace_boot.c
kernel/trace/trace_eprobe.c
kernel/trace/trace_events.c
kernel/trace/trace_events_hist.c
kernel/trace/trace_osnoise.c
kernel/trace/trace_output.c
kernel/trace/trace_synth.h
lib/logic_iomem.c
mm/maccess.c
sound/isa/gus/gus_main.c
sound/isa/gus/interwave.c
sound/pci/vx222/vx222.c
sound/soc/codecs/rt5682.c
sound/soc/generic/audio-graph-card.c
sound/soc/intel/boards/Kconfig
sound/soc/mediatek/Kconfig
sound/soc/mediatek/mt8195/mt8195-mt6359-rt1019-rt5682.c
sound/soc/rockchip/rockchip_i2s.c
sound/soc/samsung/s3c24xx_simtec.c
sound/usb/quirks.c
tools/bootconfig/main.c
tools/bootconfig/scripts/ftrace2bconf.sh
tools/bootconfig/test-bootconfig.sh
tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc

index d431e2d..df4afbc 100644 (file)
@@ -128,6 +128,8 @@ Date:               Aug 28, 2020
 KernelVersion: 5.10.0
 Contact:       dmaengine@vger.kernel.org
 Description:   The last executed device administrative command's status/error.
+               Also last configuration error overloaded.
+               Writing to it will clear the status.
 
 What:          /sys/bus/dsa/devices/wq<m>.<n>/block_on_fault
 Date:          Oct 27, 2020
@@ -211,6 +213,13 @@ Contact:   dmaengine@vger.kernel.org
 Description:   Indicate whether ATS disable is turned on for the workqueue.
                0 indicates ATS is on, and 1 indicates ATS is off for the workqueue.
 
+What:          /sys/bus/dsa/devices/wq<m>.<n>/occupancy
+Date           May 25, 2021
+KernelVersion: 5.14.0
+Contact:       dmaengine@vger.kernel.org
+Description:   Show the current number of entries in this WQ if WQ Occupancy
+               Support bit WQ capabilities is 1.
+
 What:           /sys/bus/dsa/devices/engine<m>.<n>/group_id
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
index a5c28f6..284e2df 100644 (file)
@@ -215,6 +215,17 @@ Description:    Sets the skip reset on timeout option for the device. Value of
                 "0" means device will be reset in case some CS has timed out,
                 otherwise it will not be reset.
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/state_dump
+Date:           Oct 2021
+KernelVersion:  5.15
+Contact:        ynudelman@habana.ai
+Description:    Gets the state dump occurring on a CS timeout or failure.
+                State dump is used for debug and is created each time in case of
+                a problem in a CS execution, before reset.
+                Reading from the node returns the newest state dump available.
+                Writing an integer X discards X state dumps, so that the
+                next read would return X+1-st newest state dump.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
@@ -230,6 +241,14 @@ Description:    Displays a list with information about the currently user
                 pointers (user virtual addresses) that are pinned and mapped
                 to DMA addresses
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
+Date:           Aug 2021
+KernelVersion:  5.15
+Contact:        ogabbay@kernel.org
+Description:    Allows to search for specific user pointers (user virtual
+                addresses) that are pinned and mapped to DMA addresses, and see
+                their resolution to the specific dma address.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/vm
 Date:           Jan 2019
 KernelVersion:  5.1
index 6a79f2e..a1860fc 100644 (file)
@@ -178,7 +178,7 @@ update the boot loader and the kernel image itself as long as the boot
 loader passes the correct initrd file size. If by any chance, the boot
 loader passes a longer size, the kernel fails to find the bootconfig data.
 
-To do this operation, Linux kernel provides "bootconfig" command under
+To do this operation, Linux kernel provides ``bootconfig`` command under
 tools/bootconfig, which allows admin to apply or delete the config file
 to/from initrd image. You can build it by the following command::
 
@@ -196,6 +196,43 @@ To remove the config from the image, you can use -d option as below::
 Then add "bootconfig" on the normal kernel command line to tell the
 kernel to look for the bootconfig at the end of the initrd file.
 
+
+Kernel parameters via Boot Config
+=================================
+
+In addition to the kernel command line, the boot config can be used for
+passing the kernel parameters. All the key-value pairs under ``kernel``
+key will be passed to kernel cmdline directly. Moreover, the key-value
+pairs under ``init`` will be passed to init process via the cmdline.
+The parameters are concatinated with user-given kernel cmdline string
+as the following order, so that the command line parameter can override
+bootconfig parameters (this depends on how the subsystem handles parameters
+but in general, earlier parameter will be overwritten by later one.)::
+
+ [bootconfig params][cmdline params] -- [bootconfig init params][cmdline init params]
+
+Here is an example of the bootconfig file for kernel/init parameters.::
+
+ kernel {
+   root = 01234567-89ab-cdef-0123-456789abcd
+ }
+ init {
+  splash
+ }
+
+This will be copied into the kernel cmdline string as the following::
+
+ root="01234567-89ab-cdef-0123-456789abcd" -- splash
+
+If user gives some other command line like,::
+
+ ro bootconfig -- quiet
+
+The final kernel cmdline will be the following::
+
+ root="01234567-89ab-cdef-0123-456789abcd" ro bootconfig -- splash quiet
+
+
 Config File Limitation
 ======================
 
index 828d114..91ba391 100644 (file)
                        support for the idxd driver. By default it is set to
                        true (1).
 
+       idxd.tc_override= [HW]
+                       Format: <bool>
+                       Allow override of default traffic class configuration
+                       for the device. By default it is set to false (0).
+
        ieee754=        [MIPS] Select IEEE Std 754 conformance mode
                        Format: { strict | legacy | 2008 | relaxed }
                        Default: strict
index a4f9fe2..b193ee2 100644 (file)
@@ -24,13 +24,15 @@ properties:
     items:
       - description: Control and Status Register Slave Port
       - description: Descriptor Slave Port
-      - description: Response Slave Port
+      - description: Response Slave Port (Optional)
+    minItems: 2
 
   reg-names:
     items:
       - const: csr
       - const: desc
       - const: resp
+    minItems: 2
 
   interrupts:
     maxItems: 1
diff --git a/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rz-dmac.yaml
new file mode 100644 (file)
index 0000000..7a4f415
--- /dev/null
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/renesas,rz-dmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Renesas RZ/G2L DMA Controller
+
+maintainers:
+  - Biju Das <biju.das.jz@bp.renesas.com>
+
+allOf:
+  - $ref: "dma-controller.yaml#"
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - renesas,r9a07g044-dmac # RZ/G2{L,LC}
+      - const: renesas,rz-dmac
+
+  reg:
+    items:
+      - description: Control and channel register block
+      - description: DMA extended resource selector block
+
+  interrupts:
+    maxItems: 17
+
+  interrupt-names:
+    items:
+      - const: error
+      - const: ch0
+      - const: ch1
+      - const: ch2
+      - const: ch3
+      - const: ch4
+      - const: ch5
+      - const: ch6
+      - const: ch7
+      - const: ch8
+      - const: ch9
+      - const: ch10
+      - const: ch11
+      - const: ch12
+      - const: ch13
+      - const: ch14
+      - const: ch15
+
+  clocks:
+    items:
+      - description: DMA main clock
+      - description: DMA register access clock
+
+  '#dma-cells':
+    const: 1
+    description:
+      The cell specifies the encoded MID/RID values of the DMAC port
+      connected to the DMA client and the slave channel configuration
+      parameters.
+      bits[0:9] - Specifies MID/RID value
+      bit[10] - Specifies DMA request high enable (HIEN)
+      bit[11] - Specifies DMA request detection type (LVL)
+      bits[12:14] - Specifies DMAACK output mode (AM)
+      bit[15] - Specifies Transfer Mode (TM)
+
+  dma-channels:
+    const: 16
+
+  power-domains:
+    maxItems: 1
+
+  resets:
+    items:
+      - description: Reset for DMA ARESETN reset terminal
+      - description: Reset for DMA RST_ASYNC reset terminal
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - interrupt-names
+  - clocks
+  - '#dma-cells'
+  - dma-channels
+  - power-domains
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/r9a07g044-cpg.h>
+
+    dmac: dma-controller@11820000 {
+        compatible = "renesas,r9a07g044-dmac",
+                     "renesas,rz-dmac";
+        reg = <0x11820000 0x10000>,
+              <0x11830000 0x10000>;
+        interrupts = <GIC_SPI 141 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 125 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 126 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 127 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 128 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 129 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 130 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 131 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 132 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 133 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 134 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 135 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 136 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 137 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 138 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 139 IRQ_TYPE_EDGE_RISING>,
+                     <GIC_SPI 140 IRQ_TYPE_EDGE_RISING>;
+        interrupt-names = "error",
+                          "ch0", "ch1", "ch2", "ch3",
+                          "ch4", "ch5", "ch6", "ch7",
+                          "ch8", "ch9", "ch10", "ch11",
+                          "ch12", "ch13", "ch14", "ch15";
+        clocks = <&cpg CPG_MOD R9A07G044_DMAC_ACLK>,
+                 <&cpg CPG_MOD R9A07G044_DMAC_PCLK>;
+        power-domains = <&cpg>;
+        resets = <&cpg R9A07G044_DMAC_ARESETN>,
+                 <&cpg R9A07G044_DMAC_RST_ASYNC>;
+        #dma-cells = <1>;
+        dma-channels = <16>;
+    };
index 2a5325f..4bf676f 100644 (file)
@@ -40,6 +40,13 @@ description: |
          0x0: FIFO mode with threshold selectable with bit 0-1
          0x1: Direct mode: each DMA request immediately initiates a transfer
               from/to the memory, FIFO is bypassed.
+       -bit 4: alternative DMA request/acknowledge protocol
+         0x0: Use standard DMA ACK management, where ACK signal is maintained
+              up to the removal of request and transfer completion
+         0x1: Use alternative DMA ACK management, where ACK de-assertion does
+              not wait for the de-assertion of the REQuest, ACK is only managed
+              by transfer completion. This must only be used on channels
+              managing transfers for STM32 USART/UART.
 
 
 maintainers:
index 61802a1..d370c98 100644 (file)
@@ -21,6 +21,7 @@ properties:
       - fsl,imx8mn-rpmsg-audio
       - fsl,imx8mm-rpmsg-audio
       - fsl,imx8mp-rpmsg-audio
+      - fsl,imx8ulp-rpmsg-audio
 
   model:
     $ref: /schemas/types.yaml#/definitions/string
index 53e9434..dcf790b 100644 (file)
@@ -130,36 +130,34 @@ additionalProperties: false
 
 examples:
   - |
-    #include <dt-bindings/clock/mt8195-clk.h>
     #include <dt-bindings/interrupt-controller/arm-gic.h>
     #include <dt-bindings/interrupt-controller/irq.h>
-    #include <dt-bindings/power/mt8195-power.h>
 
     afe: mt8195-afe-pcm@10890000 {
         compatible = "mediatek,mt8195-audio";
         reg = <0x10890000 0x10000>;
         interrupts = <GIC_SPI 822 IRQ_TYPE_LEVEL_HIGH 0>;
         mediatek,topckgen = <&topckgen>;
-        power-domains = <&spm MT8195_POWER_DOMAIN_AUDIO>;
+        power-domains = <&spm 7>; //MT8195_POWER_DOMAIN_AUDIO
         clocks = <&clk26m>,
-                 <&topckgen CLK_TOP_APLL1>,
-                 <&topckgen CLK_TOP_APLL2>,
-                 <&topckgen CLK_TOP_APLL12_DIV0>,
-                 <&topckgen CLK_TOP_APLL12_DIV1>,
-                 <&topckgen CLK_TOP_APLL12_DIV2>,
-                 <&topckgen CLK_TOP_APLL12_DIV3>,
-                 <&topckgen CLK_TOP_APLL12_DIV9>,
-                 <&topckgen CLK_TOP_A1SYS_HP_SEL>,
-                 <&topckgen CLK_TOP_AUD_INTBUS_SEL>,
-                 <&topckgen CLK_TOP_AUDIO_H_SEL>,
-                 <&topckgen CLK_TOP_AUDIO_LOCAL_BUS_SEL>,
-                 <&topckgen CLK_TOP_DPTX_M_SEL>,
-                 <&topckgen CLK_TOP_I2SO1_M_SEL>,
-                 <&topckgen CLK_TOP_I2SO2_M_SEL>,
-                 <&topckgen CLK_TOP_I2SI1_M_SEL>,
-                 <&topckgen CLK_TOP_I2SI2_M_SEL>,
-                 <&infracfg_ao CLK_INFRA_AO_AUDIO_26M_B>,
-                 <&scp_adsp CLK_SCP_ADSP_AUDIODSP>;
+                 <&topckgen 163>, //CLK_TOP_APLL1
+                 <&topckgen 166>, //CLK_TOP_APLL2
+                 <&topckgen 233>, //CLK_TOP_APLL12_DIV0
+                 <&topckgen 234>, //CLK_TOP_APLL12_DIV1
+                 <&topckgen 235>, //CLK_TOP_APLL12_DIV2
+                 <&topckgen 236>, //CLK_TOP_APLL12_DIV3
+                 <&topckgen 238>, //CLK_TOP_APLL12_DIV9
+                 <&topckgen 100>, //CLK_TOP_A1SYS_HP_SEL
+                 <&topckgen 33>, //CLK_TOP_AUD_INTBUS_SEL
+                 <&topckgen 34>, //CLK_TOP_AUDIO_H_SEL
+                 <&topckgen 107>, //CLK_TOP_AUDIO_LOCAL_BUS_SEL
+                 <&topckgen 98>, //CLK_TOP_DPTX_M_SEL
+                 <&topckgen 94>, //CLK_TOP_I2SO1_M_SEL
+                 <&topckgen 95>, //CLK_TOP_I2SO2_M_SEL
+                 <&topckgen 96>, //CLK_TOP_I2SI1_M_SEL
+                 <&topckgen 97>, //CLK_TOP_I2SI2_M_SEL
+                 <&infracfg_ao 50>, //CLK_INFRA_AO_AUDIO_26M_B
+                 <&scp_adsp 0>; //CLK_SCP_ADSP_AUDIODSP
         clock-names = "clk26m",
                       "apll1_ck",
                       "apll2_ck",
index 487ce4f..50ebcda 100644 (file)
@@ -36,9 +36,15 @@ CXL Core
 .. kernel-doc:: drivers/cxl/cxl.h
    :internal:
 
-.. kernel-doc:: drivers/cxl/core.c
+.. kernel-doc:: drivers/cxl/core/bus.c
    :doc: cxl core
 
+.. kernel-doc:: drivers/cxl/core/pmem.c
+   :doc: cxl pmem
+
+.. kernel-doc:: drivers/cxl/core/regs.c
+   :doc: cxl registers
+
 External Interfaces
 ===================
 
index d5a73fa..8126bea 100644 (file)
@@ -37,7 +37,7 @@ TTM initialization
     This section is outdated.
 
 Drivers wishing to support TTM must pass a filled :c:type:`ttm_bo_driver
-<ttm_bo_driver>` structure to ttm_bo_device_init, together with an
+<ttm_bo_driver>` structure to ttm_device_init, together with an
 initialized global reference to the memory manager.  The ttm_bo_driver
 structure contains several fields with function pointers for
 initializing the TTM, allocating and freeing memory, waiting for command
index 3a9a7cb..5ec52be 100644 (file)
@@ -985,6 +985,12 @@ S: Supported
 T:     git https://gitlab.freedesktop.org/agd5f/linux.git
 F:     drivers/gpu/drm/amd/pm/powerplay/
 
+AMD PTDMA DRIVER
+M:     Sanjay R Mehta <sanju.mehta@amd.com>
+L:     dmaengine@vger.kernel.org
+S:     Maintained
+F:     drivers/dma/ptdma/
+
 AMD SEATTLE DEVICE TREE SUPPORT
 M:     Brijesh Singh <brijeshkumar.singh@amd.com>
 M:     Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
@@ -2328,14 +2334,14 @@ N:      oxnas
 
 ARM/PALM TREO SUPPORT
 M:     Tomas Cech <sleep_walker@suse.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 W:     http://hackndev.com
 F:     arch/arm/mach-pxa/palmtreo.*
 
 ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT
 M:     Marek Vasut <marek.vasut@gmail.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 W:     http://hackndev.com
 F:     arch/arm/mach-pxa/include/mach/palmld.h
@@ -2349,7 +2355,7 @@ F:        arch/arm/mach-pxa/palmtx.c
 
 ARM/PALMZ72 SUPPORT
 M:     Sergey Lapin <slapin@ossfans.org>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 W:     http://hackndev.com
 F:     arch/arm/mach-pxa/palmz72.*
@@ -2519,7 +2525,7 @@ N:        s5pv210
 
 ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT
 M:     Andrzej Hajda <a.hajda@samsung.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-media@vger.kernel.org
 S:     Maintained
 F:     drivers/media/platform/s5p-g2d/
@@ -2536,14 +2542,14 @@ ARM/SAMSUNG S5P SERIES JPEG CODEC SUPPORT
 M:     Andrzej Pietrasiewicz <andrzejtp2010@gmail.com>
 M:     Jacek Anaszewski <jacek.anaszewski@gmail.com>
 M:     Sylwester Nawrocki <s.nawrocki@samsung.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-media@vger.kernel.org
 S:     Maintained
 F:     drivers/media/platform/s5p-jpeg/
 
 ARM/SAMSUNG S5P SERIES Multi Format Codec (MFC) SUPPORT
 M:     Andrzej Hajda <a.hajda@samsung.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-media@vger.kernel.org
 S:     Maintained
 F:     drivers/media/platform/s5p-mfc/
@@ -3562,7 +3568,7 @@ BROADCOM BCM5301X ARM ARCHITECTURE
 M:     Hauke Mehrtens <hauke@hauke-m.de>
 M:     RafaÅ‚ MiÅ‚ecki <zajec5@gmail.com>
 M:     bcm-kernel-feedback-list@broadcom.com
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/bcm470*
 F:     arch/arm/boot/dts/bcm5301*
@@ -3572,7 +3578,7 @@ F:        arch/arm/mach-bcm/bcm_5301x.c
 BROADCOM BCM53573 ARM ARCHITECTURE
 M:     RafaÅ‚ MiÅ‚ecki <rafal@milecki.pl>
 L:     bcm-kernel-feedback-list@broadcom.com
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/boot/dts/bcm47189*
 F:     arch/arm/boot/dts/bcm53573*
@@ -4868,7 +4874,7 @@ CPUIDLE DRIVER - ARM BIG LITTLE
 M:     Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 M:     Daniel Lezcano <daniel.lezcano@linaro.org>
 L:     linux-pm@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
 F:     drivers/cpuidle/cpuidle-big_little.c
@@ -4888,14 +4894,14 @@ CPUIDLE DRIVER - ARM PSCI
 M:     Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 M:     Sudeep Holla <sudeep.holla@arm.com>
 L:     linux-pm@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     drivers/cpuidle/cpuidle-psci.c
 
 CPUIDLE DRIVER - ARM PSCI PM DOMAIN
 M:     Ulf Hansson <ulf.hansson@linaro.org>
 L:     linux-pm@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     drivers/cpuidle/cpuidle-psci.h
 F:     drivers/cpuidle/cpuidle-psci-domain.c
@@ -7266,7 +7272,7 @@ F:        tools/firewire/
 
 FIRMWARE FRAMEWORK FOR ARMV8-A
 M:     Sudeep Holla <sudeep.holla@arm.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     drivers/firmware/arm_ffa/
 F:     include/linux/arm_ffa.h
@@ -7445,7 +7451,7 @@ F:        include/linux/platform_data/video-imxfb.h
 
 FREESCALE IMX DDR PMU DRIVER
 M:     Frank Li <Frank.li@nxp.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/admin-guide/perf/imx-ddr.rst
 F:     Documentation/devicetree/bindings/perf/fsl-imx-ddr.yaml
@@ -7537,7 +7543,7 @@ F:        drivers/tty/serial/ucc_uart.c
 FREESCALE SOC DRIVERS
 M:     Li Yang <leoyang.li@nxp.com>
 L:     linuxppc-dev@lists.ozlabs.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/misc/fsl,dpaa2-console.yaml
 F:     Documentation/devicetree/bindings/soc/fsl/
@@ -11185,7 +11191,7 @@ F:      drivers/net/wireless/marvell/libertas/
 
 MARVELL MACCHIATOBIN SUPPORT
 M:     Russell King <linux@armlinux.org.uk>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm64/boot/dts/marvell/armada-8040-mcbin.dts
 
@@ -14266,7 +14272,7 @@ F:      drivers/pci/controller/pcie-altera.c
 PCI DRIVER FOR APPLIEDMICRO XGENE
 M:     Toan Le <toan@os.amperecomputing.com>
 L:     linux-pci@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/xgene-pci.txt
 F:     drivers/pci/controller/pci-xgene.c
@@ -14274,7 +14280,7 @@ F:      drivers/pci/controller/pci-xgene.c
 PCI DRIVER FOR ARM VERSATILE PLATFORM
 M:     Rob Herring <robh@kernel.org>
 L:     linux-pci@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/versatile.yaml
 F:     drivers/pci/controller/pci-versatile.c
@@ -14282,7 +14288,7 @@ F:      drivers/pci/controller/pci-versatile.c
 PCI DRIVER FOR ARMADA 8K
 M:     Thomas Petazzoni <thomas.petazzoni@bootlin.com>
 L:     linux-pci@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/pci-armada8k.txt
 F:     drivers/pci/controller/dwc/pcie-armada8k.c
@@ -14300,7 +14306,7 @@ M:      Mingkai Hu <mingkai.hu@nxp.com>
 M:     Roy Zang <roy.zang@nxp.com>
 L:     linuxppc-dev@lists.ozlabs.org
 L:     linux-pci@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     drivers/pci/controller/dwc/*layerscape*
 
@@ -14380,7 +14386,7 @@ F:      drivers/pci/controller/pci-tegra.c
 PCI DRIVER FOR NXP LAYERSCAPE GEN4 CONTROLLER
 M:     Hou Zhiqiang <Zhiqiang.Hou@nxp.com>
 L:     linux-pci@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/layerscape-pcie-gen4.txt
 F:     drivers/pci/controller/mobiveil/pcie-layerscape-gen4.c
@@ -14415,7 +14421,7 @@ PCI DRIVER FOR TI DRA7XX/J721E
 M:     Kishon Vijay Abraham I <kishon@ti.com>
 L:     linux-omap@vger.kernel.org
 L:     linux-pci@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Supported
 F:     Documentation/devicetree/bindings/pci/ti-pci.txt
 F:     drivers/pci/controller/cadence/pci-j721e.c
@@ -14471,7 +14477,7 @@ F:      drivers/pci/controller/pcie-altera-msi.c
 PCI MSI DRIVER FOR APPLIEDMICRO XGENE
 M:     Toan Le <toan@os.amperecomputing.com>
 L:     linux-pci@vger.kernel.org
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/pci/xgene-pci-msi.txt
 F:     drivers/pci/controller/pci-xgene-msi.c
@@ -14988,7 +14994,7 @@ F:      include/linux/dtpm.h
 POWER STATE COORDINATION INTERFACE (PSCI)
 M:     Mark Rutland <mark.rutland@arm.com>
 M:     Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     drivers/firmware/psci/
 F:     include/linux/psci.h
@@ -15513,7 +15519,7 @@ F:      arch/hexagon/
 
 QUALCOMM HIDMA DRIVER
 M:     Sinan Kaya <okaya@kernel.org>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-arm-msm@vger.kernel.org
 L:     dmaengine@vger.kernel.org
 S:     Supported
@@ -17227,7 +17233,7 @@ SECURE MONITOR CALL(SMC) CALLING CONVENTION (SMCCC)
 M:     Mark Rutland <mark.rutland@arm.com>
 M:     Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
 M:     Sudeep Holla <sudeep.holla@arm.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     drivers/firmware/smccc/
 F:     include/linux/arm-smccc.h
@@ -17344,7 +17350,7 @@ F:      drivers/media/pci/solo6x10/
 
 SOFTWARE DELEGATED EXCEPTION INTERFACE (SDEI)
 M:     James Morse <james.morse@arm.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/arm/firmware/sdei.txt
 F:     drivers/firmware/arm_sdei.c
@@ -18131,7 +18137,7 @@ F:      drivers/mfd/syscon.c
 SYSTEM CONTROL & POWER/MANAGEMENT INTERFACE (SCPI/SCMI) Message Protocol drivers
 M:     Sudeep Holla <sudeep.holla@arm.com>
 R:     Cristian Marussi <cristian.marussi@arm.com>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/firmware/arm,sc[mp]i.yaml
 F:     drivers/clk/clk-sc[mp]i.c
@@ -18504,7 +18510,7 @@ TEXAS INSTRUMENTS' SYSTEM CONTROL INTERFACE (TISCI) PROTOCOL DRIVER
 M:     Nishanth Menon <nm@ti.com>
 M:     Tero Kristo <kristo@kernel.org>
 M:     Santosh Shilimkar <ssantosh@kernel.org>
-L:     linux-arm-kernel@lists.infradead.org
+L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/arm/keystone/ti,k3-sci-common.yaml
 F:     Documentation/devicetree/bindings/arm/keystone/ti,sci.txt
index 9986302..fc19642 100644 (file)
@@ -124,7 +124,6 @@ config ARM
        select PCI_SYSCALL if PCI
        select PERF_USE_VMALLOC
        select RTC_LIB
-       select SET_FS
        select SYS_SUPPORTS_APM_EMULATION
        select TRACE_IRQFLAGS_SUPPORT if !CPU_V7M
        # Above selects are sorted alphabetically; please add new ones
index 173da68..847c31e 100644 (file)
@@ -308,7 +308,8 @@ $(BOOT_TARGETS): vmlinux
        @$(kecho) '  Kernel: $(boot)/$@ is ready'
 
 $(INSTALL_TARGETS):
-       $(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
+       $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh "$(KERNELRELEASE)" \
+       $(boot)/$(patsubst %install,%Image,$@) System.map "$(INSTALL_PATH)"
 
 PHONY += vdso_install
 vdso_install:
index 0b3cd7a..54a09f9 100644 (file)
@@ -96,23 +96,11 @@ $(obj)/bootp/bootp: $(obj)/zImage initrd FORCE
 $(obj)/bootpImage: $(obj)/bootp/bootp FORCE
        $(call if_changed,objcopy)
 
-PHONY += initrd install zinstall uinstall
+PHONY += initrd
 initrd:
        @test "$(INITRD_PHYS)" != "" || \
        (echo This machine does not support INITRD; exit -1)
        @test "$(INITRD)" != "" || \
        (echo You must specify INITRD; exit -1)
 
-install:
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-       $(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-       $(obj)/zImage System.map "$(INSTALL_PATH)"
-
-uinstall:
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh "$(KERNELRELEASE)" \
-       $(obj)/uImage System.map "$(INSTALL_PATH)"
-
 subdir-            := bootp compressed dts
index 9d91ae1..91265e7 100644 (file)
@@ -85,6 +85,8 @@ compress-$(CONFIG_KERNEL_LZ4)  = lz4
 libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o
 
 ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y)
+CFLAGS_REMOVE_atags_to_fdt.o += -Wframe-larger-than=${CONFIG_FRAME_WARN}
+CFLAGS_atags_to_fdt.o += -Wframe-larger-than=1280
 OBJS   += $(libfdt_objs) atags_to_fdt.o
 endif
 ifeq ($(CONFIG_USE_OF),y)
index 595e538..4b69cf8 100644 (file)
@@ -52,17 +52,6 @@ static inline uint32_t __div64_32(uint64_t *n, uint32_t base)
 
 #else
 
-/*
- * gcc versions earlier than 4.0 are simply too problematic for the
- * __div64_const32() code in asm-generic/div64.h. First there is
- * gcc PR 15089 that tend to trig on more complex constructs, spurious
- * .global __udivsi3 are inserted even if none of those symbols are
- * referenced in the generated code, and those gcc versions are not able
- * to do constant propagation on long long values anyway.
- */
-
-#define __div64_const32_is_OK (__GNUC__ >= 4)
-
 static inline uint64_t __arch_xprod_64(uint64_t m, uint64_t n, bool bias)
 {
        unsigned long long res;
index c50e383..f3bb8a2 100644 (file)
@@ -2,10 +2,6 @@
 #ifndef _ARCH_ARM_GPIO_H
 #define _ARCH_ARM_GPIO_H
 
-#if CONFIG_ARCH_NR_GPIO > 0
-#define ARCH_NR_GPIOS CONFIG_ARCH_NR_GPIO
-#endif
-
 /* Note: this may rely upon the value of ARCH_NR_GPIOS set in mach/gpio.h */
 #include <asm-generic/gpio.h>
 
index 91d6b78..93051e2 100644 (file)
@@ -19,7 +19,6 @@ struct pt_regs {
 struct svc_pt_regs {
        struct pt_regs regs;
        u32 dacr;
-       u32 addr_limit;
 };
 
 #define to_svc_pt_regs(r) container_of(r, struct svc_pt_regs, regs)
index fd02761..24c19d6 100644 (file)
@@ -22,7 +22,21 @@ extern const unsigned long sys_call_table[];
 static inline int syscall_get_nr(struct task_struct *task,
                                 struct pt_regs *regs)
 {
-       return task_thread_info(task)->syscall;
+       if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT))
+               return task_thread_info(task)->abi_syscall;
+
+       return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK;
+}
+
+static inline bool __in_oabi_syscall(struct task_struct *task)
+{
+       return IS_ENABLED(CONFIG_OABI_COMPAT) &&
+               (task_thread_info(task)->abi_syscall & __NR_OABI_SYSCALL_BASE);
+}
+
+static inline bool in_oabi_syscall(void)
+{
+       return __in_oabi_syscall(current);
 }
 
 static inline void syscall_rollback(struct task_struct *task,
index a02799b..9a18da3 100644 (file)
@@ -31,8 +31,6 @@ struct task_struct;
 
 #include <asm/types.h>
 
-typedef unsigned long mm_segment_t;
-
 struct cpu_context_save {
        __u32   r4;
        __u32   r5;
@@ -54,7 +52,6 @@ struct cpu_context_save {
 struct thread_info {
        unsigned long           flags;          /* low level flags */
        int                     preempt_count;  /* 0 => preemptable, <0 => bug */
-       mm_segment_t            addr_limit;     /* address limit */
        struct task_struct      *task;          /* main task structure */
        __u32                   cpu;            /* cpu */
        __u32                   cpu_domain;     /* cpu domain */
@@ -62,7 +59,7 @@ struct thread_info {
        unsigned long           stack_canary;
 #endif
        struct cpu_context_save cpu_context;    /* cpu context */
-       __u32                   syscall;        /* syscall number */
+       __u32                   abi_syscall;    /* ABI type and syscall nr */
        __u8                    used_cp[16];    /* thread used copro */
        unsigned long           tp_value[2];    /* TLS registers */
        union fp_state          fpstate __attribute__((aligned(8)));
@@ -77,7 +74,6 @@ struct thread_info {
        .task           = &tsk,                                         \
        .flags          = 0,                                            \
        .preempt_count  = INIT_PREEMPT_COUNT,                           \
-       .addr_limit     = KERNEL_DS,                                    \
 }
 
 /*
index e6eb7a2..6451a43 100644 (file)
         * if \disable is set.
         */
        .macro  uaccess_entry, tsk, tmp0, tmp1, tmp2, disable
-       ldr     \tmp1, [\tsk, #TI_ADDR_LIMIT]
-       ldr     \tmp2, =TASK_SIZE
-       str     \tmp2, [\tsk, #TI_ADDR_LIMIT]
  DACR( mrc     p15, 0, \tmp0, c3, c0, 0)
  DACR( str     \tmp0, [sp, #SVC_DACR])
-       str     \tmp1, [sp, #SVC_ADDR_LIMIT]
        .if \disable && IS_ENABLED(CONFIG_CPU_SW_DOMAIN_PAN)
        /* kernel=client, user=no access */
        mov     \tmp2, #DACR_UACCESS_DISABLE
 
        /* Restore the user access state previously saved by uaccess_entry */
        .macro  uaccess_exit, tsk, tmp0, tmp1
-       ldr     \tmp1, [sp, #SVC_ADDR_LIMIT]
  DACR( ldr     \tmp0, [sp, #SVC_DACR])
-       str     \tmp1, [\tsk, #TI_ADDR_LIMIT]
  DACR( mcr     p15, 0, \tmp0, c3, c0, 0)
        .endm
 
index a13d902..084d1c0 100644 (file)
@@ -52,32 +52,8 @@ static __always_inline void uaccess_restore(unsigned int flags)
 extern int __get_user_bad(void);
 extern int __put_user_bad(void);
 
-/*
- * Note that this is actually 0x1,0000,0000
- */
-#define KERNEL_DS      0x00000000
-
 #ifdef CONFIG_MMU
 
-#define USER_DS                TASK_SIZE
-#define get_fs()       (current_thread_info()->addr_limit)
-
-static inline void set_fs(mm_segment_t fs)
-{
-       current_thread_info()->addr_limit = fs;
-
-       /*
-        * Prevent a mispredicted conditional call to set_fs from forwarding
-        * the wrong address limit to access_ok under speculation.
-        */
-       dsb(nsh);
-       isb();
-
-       modify_domain(DOMAIN_KERNEL, fs ? DOMAIN_CLIENT : DOMAIN_MANAGER);
-}
-
-#define uaccess_kernel()       (get_fs() == KERNEL_DS)
-
 /*
  * We use 33-bit arithmetic here.  Success returns zero, failure returns
  * addr_limit.  We take advantage that addr_limit will be zero for KERNEL_DS,
@@ -89,7 +65,7 @@ static inline void set_fs(mm_segment_t fs)
        __asm__(".syntax unified\n" \
                "adds %1, %2, %3; sbcscc %1, %1, %0; movcc %0, #0" \
                : "=&r" (flag), "=&r" (roksum) \
-               : "r" (addr), "Ir" (size), "0" (current_thread_info()->addr_limit) \
+               : "r" (addr), "Ir" (size), "0" (TASK_SIZE) \
                : "cc"); \
        flag; })
 
@@ -120,7 +96,7 @@ static inline void __user *__uaccess_mask_range_ptr(const void __user *ptr,
        "       subshs  %1, %1, %2\n"
        "       movlo   %0, #0\n"
        : "+r" (safe_ptr), "=&r" (tmp)
-       : "r" (size), "r" (current_thread_info()->addr_limit)
+       : "r" (size), "r" (TASK_SIZE)
        : "cc");
 
        csdb();
@@ -194,7 +170,7 @@ extern int __get_user_64t_4(void *);
 
 #define __get_user_check(x, p)                                         \
        ({                                                              \
-               unsigned long __limit = current_thread_info()->addr_limit - 1; \
+               unsigned long __limit = TASK_SIZE - 1; \
                register typeof(*(p)) __user *__p asm("r0") = (p);      \
                register __inttype(x) __r2 asm("r2");                   \
                register unsigned long __l asm("r1") = __limit;         \
@@ -245,7 +221,7 @@ extern int __put_user_8(void *, unsigned long long);
 
 #define __put_user_check(__pu_val, __ptr, __err, __s)                  \
        ({                                                              \
-               unsigned long __limit = current_thread_info()->addr_limit - 1; \
+               unsigned long __limit = TASK_SIZE - 1; \
                register typeof(__pu_val) __r2 asm("r2") = __pu_val;    \
                register const void __user *__p asm("r0") = __ptr;      \
                register unsigned long __l asm("r1") = __limit;         \
@@ -262,19 +238,8 @@ extern int __put_user_8(void *, unsigned long long);
 
 #else /* CONFIG_MMU */
 
-/*
- * uClinux has only one addr space, so has simplified address limits.
- */
-#define USER_DS                        KERNEL_DS
-
-#define uaccess_kernel()       (true)
 #define __addr_ok(addr)                ((void)(addr), 1)
 #define __range_ok(addr, size) ((void)(addr), 0)
-#define get_fs()               (KERNEL_DS)
-
-static inline void set_fs(mm_segment_t fs)
-{
-}
 
 #define get_user(x, p) __get_user(x, p)
 #define __put_user_check __put_user_nocheck
@@ -283,9 +248,6 @@ static inline void set_fs(mm_segment_t fs)
 
 #define access_ok(addr, size)  (__range_ok(addr, size) == 0)
 
-#define user_addr_max() \
-       (uaccess_kernel() ? ~0UL : get_fs())
-
 #ifdef CONFIG_CPU_SPECTRE
 /*
  * When mitigating Spectre variant 1, it is not worth fixing the non-
@@ -308,11 +270,11 @@ static inline void set_fs(mm_segment_t fs)
 #define __get_user(x, ptr)                                             \
 ({                                                                     \
        long __gu_err = 0;                                              \
-       __get_user_err((x), (ptr), __gu_err);                           \
+       __get_user_err((x), (ptr), __gu_err, TUSER());                  \
        __gu_err;                                                       \
 })
 
-#define __get_user_err(x, ptr, err)                                    \
+#define __get_user_err(x, ptr, err, __t)                               \
 do {                                                                   \
        unsigned long __gu_addr = (unsigned long)(ptr);                 \
        unsigned long __gu_val;                                         \
@@ -321,18 +283,19 @@ do {                                                                      \
        might_fault();                                                  \
        __ua_flags = uaccess_save_and_enable();                         \
        switch (sizeof(*(ptr))) {                                       \
-       case 1: __get_user_asm_byte(__gu_val, __gu_addr, err);  break;  \
-       case 2: __get_user_asm_half(__gu_val, __gu_addr, err);  break;  \
-       case 4: __get_user_asm_word(__gu_val, __gu_addr, err);  break;  \
+       case 1: __get_user_asm_byte(__gu_val, __gu_addr, err, __t); break;      \
+       case 2: __get_user_asm_half(__gu_val, __gu_addr, err, __t); break;      \
+       case 4: __get_user_asm_word(__gu_val, __gu_addr, err, __t); break;      \
        default: (__gu_val) = __get_user_bad();                         \
        }                                                               \
        uaccess_restore(__ua_flags);                                    \
        (x) = (__typeof__(*(ptr)))__gu_val;                             \
 } while (0)
+#endif
 
 #define __get_user_asm(x, addr, err, instr)                    \
        __asm__ __volatile__(                                   \
-       "1:     " TUSER(instr) " %1, [%2], #0\n"                \
+       "1:     " instr " %1, [%2], #0\n"                       \
        "2:\n"                                                  \
        "       .pushsection .text.fixup,\"ax\"\n"              \
        "       .align  2\n"                                    \
@@ -348,40 +311,38 @@ do {                                                                      \
        : "r" (addr), "i" (-EFAULT)                             \
        : "cc")
 
-#define __get_user_asm_byte(x, addr, err)                      \
-       __get_user_asm(x, addr, err, ldrb)
+#define __get_user_asm_byte(x, addr, err, __t)                 \
+       __get_user_asm(x, addr, err, "ldrb" __t)
 
 #if __LINUX_ARM_ARCH__ >= 6
 
-#define __get_user_asm_half(x, addr, err)                      \
-       __get_user_asm(x, addr, err, ldrh)
+#define __get_user_asm_half(x, addr, err, __t)                 \
+       __get_user_asm(x, addr, err, "ldrh" __t)
 
 #else
 
 #ifndef __ARMEB__
-#define __get_user_asm_half(x, __gu_addr, err)                 \
+#define __get_user_asm_half(x, __gu_addr, err, __t)            \
 ({                                                             \
        unsigned long __b1, __b2;                               \
-       __get_user_asm_byte(__b1, __gu_addr, err);              \
-       __get_user_asm_byte(__b2, __gu_addr + 1, err);          \
+       __get_user_asm_byte(__b1, __gu_addr, err, __t);         \
+       __get_user_asm_byte(__b2, __gu_addr + 1, err, __t);     \
        (x) = __b1 | (__b2 << 8);                               \
 })
 #else
-#define __get_user_asm_half(x, __gu_addr, err)                 \
+#define __get_user_asm_half(x, __gu_addr, err, __t)            \
 ({                                                             \
        unsigned long __b1, __b2;                               \
-       __get_user_asm_byte(__b1, __gu_addr, err);              \
-       __get_user_asm_byte(__b2, __gu_addr + 1, err);          \
+       __get_user_asm_byte(__b1, __gu_addr, err, __t);         \
+       __get_user_asm_byte(__b2, __gu_addr + 1, err, __t);     \
        (x) = (__b1 << 8) | __b2;                               \
 })
 #endif
 
 #endif /* __LINUX_ARM_ARCH__ >= 6 */
 
-#define __get_user_asm_word(x, addr, err)                      \
-       __get_user_asm(x, addr, err, ldr)
-#endif
-
+#define __get_user_asm_word(x, addr, err, __t)                 \
+       __get_user_asm(x, addr, err, "ldr" __t)
 
 #define __put_user_switch(x, ptr, __err, __fn)                         \
        do {                                                            \
@@ -425,7 +386,7 @@ do {                                                                        \
 #define __put_user_nocheck(x, __pu_ptr, __err, __size)                 \
        do {                                                            \
                unsigned long __pu_addr = (unsigned long)__pu_ptr;      \
-               __put_user_nocheck_##__size(x, __pu_addr, __err);       \
+               __put_user_nocheck_##__size(x, __pu_addr, __err, TUSER());\
        } while (0)
 
 #define __put_user_nocheck_1 __put_user_asm_byte
@@ -433,9 +394,11 @@ do {                                                                       \
 #define __put_user_nocheck_4 __put_user_asm_word
 #define __put_user_nocheck_8 __put_user_asm_dword
 
+#endif /* !CONFIG_CPU_SPECTRE */
+
 #define __put_user_asm(x, __pu_addr, err, instr)               \
        __asm__ __volatile__(                                   \
-       "1:     " TUSER(instr) " %1, [%2], #0\n"                \
+       "1:     " instr " %1, [%2], #0\n"               \
        "2:\n"                                                  \
        "       .pushsection .text.fixup,\"ax\"\n"              \
        "       .align  2\n"                                    \
@@ -450,36 +413,36 @@ do {                                                                      \
        : "r" (x), "r" (__pu_addr), "i" (-EFAULT)               \
        : "cc")
 
-#define __put_user_asm_byte(x, __pu_addr, err)                 \
-       __put_user_asm(x, __pu_addr, err, strb)
+#define __put_user_asm_byte(x, __pu_addr, err, __t)            \
+       __put_user_asm(x, __pu_addr, err, "strb" __t)
 
 #if __LINUX_ARM_ARCH__ >= 6
 
-#define __put_user_asm_half(x, __pu_addr, err)                 \
-       __put_user_asm(x, __pu_addr, err, strh)
+#define __put_user_asm_half(x, __pu_addr, err, __t)            \
+       __put_user_asm(x, __pu_addr, err, "strh" __t)
 
 #else
 
 #ifndef __ARMEB__
-#define __put_user_asm_half(x, __pu_addr, err)                 \
+#define __put_user_asm_half(x, __pu_addr, err, __t)            \
 ({                                                             \
        unsigned long __temp = (__force unsigned long)(x);      \
-       __put_user_asm_byte(__temp, __pu_addr, err);            \
-       __put_user_asm_byte(__temp >> 8, __pu_addr + 1, err);   \
+       __put_user_asm_byte(__temp, __pu_addr, err, __t);       \
+       __put_user_asm_byte(__temp >> 8, __pu_addr + 1, err, __t);\
 })
 #else
-#define __put_user_asm_half(x, __pu_addr, err)                 \
+#define __put_user_asm_half(x, __pu_addr, err, __t)            \
 ({                                                             \
        unsigned long __temp = (__force unsigned long)(x);      \
-       __put_user_asm_byte(__temp >> 8, __pu_addr, err);       \
-       __put_user_asm_byte(__temp, __pu_addr + 1, err);        \
+       __put_user_asm_byte(__temp >> 8, __pu_addr, err, __t);  \
+       __put_user_asm_byte(__temp, __pu_addr + 1, err, __t);   \
 })
 #endif
 
 #endif /* __LINUX_ARM_ARCH__ >= 6 */
 
-#define __put_user_asm_word(x, __pu_addr, err)                 \
-       __put_user_asm(x, __pu_addr, err, str)
+#define __put_user_asm_word(x, __pu_addr, err, __t)            \
+       __put_user_asm(x, __pu_addr, err, "str" __t)
 
 #ifndef __ARMEB__
 #define        __reg_oper0     "%R2"
@@ -489,12 +452,12 @@ do {                                                                      \
 #define        __reg_oper1     "%R2"
 #endif
 
-#define __put_user_asm_dword(x, __pu_addr, err)                        \
+#define __put_user_asm_dword(x, __pu_addr, err, __t)           \
        __asm__ __volatile__(                                   \
- ARM(  "1:     " TUSER(str) "  " __reg_oper1 ", [%1], #4\n"    ) \
- ARM(  "2:     " TUSER(str) "  " __reg_oper0 ", [%1]\n"        ) \
- THUMB(        "1:     " TUSER(str) "  " __reg_oper1 ", [%1]\n"        ) \
- THUMB(        "2:     " TUSER(str) "  " __reg_oper0 ", [%1, #4]\n"    ) \
+ ARM(  "1:     str" __t "      " __reg_oper1 ", [%1], #4\n"  ) \
+ ARM(  "2:     str" __t "      " __reg_oper0 ", [%1]\n"      ) \
+ THUMB(        "1:     str" __t "      " __reg_oper1 ", [%1]\n"      ) \
+ THUMB(        "2:     str" __t "      " __reg_oper0 ", [%1, #4]\n"  ) \
        "3:\n"                                                  \
        "       .pushsection .text.fixup,\"ax\"\n"              \
        "       .align  2\n"                                    \
@@ -510,7 +473,49 @@ do {                                                                       \
        : "r" (x), "i" (-EFAULT)                                \
        : "cc")
 
-#endif /* !CONFIG_CPU_SPECTRE */
+#define HAVE_GET_KERNEL_NOFAULT
+
+#define __get_kernel_nofault(dst, src, type, err_label)                        \
+do {                                                                   \
+       const type *__pk_ptr = (src);                                   \
+       unsigned long __src = (unsigned long)(__pk_ptr);                \
+       type __val;                                                     \
+       int __err = 0;                                                  \
+       switch (sizeof(type)) {                                         \
+       case 1: __get_user_asm_byte(__val, __src, __err, ""); break;    \
+       case 2: __get_user_asm_half(__val, __src, __err, ""); break;    \
+       case 4: __get_user_asm_word(__val, __src, __err, ""); break;    \
+       case 8: {                                                       \
+               u32 *__v32 = (u32*)&__val;                              \
+               __get_user_asm_word(__v32[0], __src, __err, "");        \
+               if (__err)                                              \
+                       break;                                          \
+               __get_user_asm_word(__v32[1], __src+4, __err, "");      \
+               break;                                                  \
+       }                                                               \
+       default: __err = __get_user_bad(); break;                       \
+       }                                                               \
+       *(type *)(dst) = __val;                                         \
+       if (__err)                                                      \
+               goto err_label;                                         \
+} while (0)
+
+#define __put_kernel_nofault(dst, src, type, err_label)                        \
+do {                                                                   \
+       const type *__pk_ptr = (dst);                                   \
+       unsigned long __dst = (unsigned long)__pk_ptr;                  \
+       int __err = 0;                                                  \
+       type __val = *(type *)src;                                      \
+       switch (sizeof(type)) {                                         \
+       case 1: __put_user_asm_byte(__val, __dst, __err, ""); break;    \
+       case 2: __put_user_asm_half(__val, __dst, __err, ""); break;    \
+       case 4: __put_user_asm_word(__val, __dst, __err, ""); break;    \
+       case 8: __put_user_asm_dword(__val, __dst, __err, ""); break;   \
+       default: __err = __put_user_bad(); break;                       \
+       }                                                               \
+       if (__err)                                                      \
+               goto err_label;                                         \
+} while (0)
 
 #ifdef CONFIG_MMU
 extern unsigned long __must_check
index 1e2c3eb..ce96891 100644 (file)
@@ -24,10 +24,6 @@ __asm__(".syntax unified");
 
 #ifdef CONFIG_THUMB2_KERNEL
 
-#if __GNUC__ < 4
-#error Thumb-2 kernel requires gcc >= 4
-#endif
-
 /* The CPSR bit describing the instruction set (Thumb) */
 #define PSR_ISETSTATE  PSR_T_BIT
 
index ae7749e..a114991 100644 (file)
@@ -15,6 +15,7 @@
 #define _UAPI__ASM_ARM_UNISTD_H
 
 #define __NR_OABI_SYSCALL_BASE 0x900000
+#define __NR_SYSCALL_MASK      0x0fffff
 
 #if defined(__thumb__) || defined(__ARM_EABI__)
 #define __NR_SYSCALL_BASE      0
index 6494470..a646a3f 100644 (file)
@@ -43,11 +43,11 @@ int main(void)
   BLANK();
   DEFINE(TI_FLAGS,             offsetof(struct thread_info, flags));
   DEFINE(TI_PREEMPT,           offsetof(struct thread_info, preempt_count));
-  DEFINE(TI_ADDR_LIMIT,                offsetof(struct thread_info, addr_limit));
   DEFINE(TI_TASK,              offsetof(struct thread_info, task));
   DEFINE(TI_CPU,               offsetof(struct thread_info, cpu));
   DEFINE(TI_CPU_DOMAIN,                offsetof(struct thread_info, cpu_domain));
   DEFINE(TI_CPU_SAVE,          offsetof(struct thread_info, cpu_context));
+  DEFINE(TI_ABI_SYSCALL,       offsetof(struct thread_info, abi_syscall));
   DEFINE(TI_USED_CP,           offsetof(struct thread_info, used_cp));
   DEFINE(TI_TP_VALUE,          offsetof(struct thread_info, tp_value));
   DEFINE(TI_FPSTATE,           offsetof(struct thread_info, fpstate));
@@ -88,7 +88,6 @@ int main(void)
   DEFINE(S_OLD_R0,             offsetof(struct pt_regs, ARM_ORIG_r0));
   DEFINE(PT_REGS_SIZE,         sizeof(struct pt_regs));
   DEFINE(SVC_DACR,             offsetof(struct svc_pt_regs, dacr));
-  DEFINE(SVC_ADDR_LIMIT,       offsetof(struct svc_pt_regs, addr_limit));
   DEFINE(SVC_REGS_SIZE,                sizeof(struct svc_pt_regs));
   BLANK();
   DEFINE(SIGFRAME_RC3_OFFSET,  offsetof(struct sigframe, retcode[3]));
index 7f0b7ab..d9c99db 100644 (file)
@@ -49,10 +49,6 @@ __ret_fast_syscall:
  UNWIND(.fnstart       )
  UNWIND(.cantunwind    )
        disable_irq_notrace                     @ disable interrupts
-       ldr     r2, [tsk, #TI_ADDR_LIMIT]
-       ldr     r1, =TASK_SIZE
-       cmp     r2, r1
-       blne    addr_limit_check_failed
        ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
        movs    r1, r1, lsl #16
        bne     fast_work_pending
@@ -87,10 +83,6 @@ __ret_fast_syscall:
        bl      do_rseq_syscall
 #endif
        disable_irq_notrace                     @ disable interrupts
-       ldr     r2, [tsk, #TI_ADDR_LIMIT]
-       ldr     r1, =TASK_SIZE
-       cmp     r2, r1
-       blne    addr_limit_check_failed
        ldr     r1, [tsk, #TI_FLAGS]            @ re-check for syscall tracing
        movs    r1, r1, lsl #16
        beq     no_work_pending
@@ -129,10 +121,6 @@ ret_slow_syscall:
 #endif
        disable_irq_notrace                     @ disable interrupts
 ENTRY(ret_to_user_from_irq)
-       ldr     r2, [tsk, #TI_ADDR_LIMIT]
-       ldr     r1, =TASK_SIZE
-       cmp     r2, r1
-       blne    addr_limit_check_failed
        ldr     r1, [tsk, #TI_FLAGS]
        movs    r1, r1, lsl #16
        bne     slow_work_pending
@@ -226,6 +214,7 @@ ENTRY(vector_swi)
        /* saved_psr and saved_pc are now dead */
 
        uaccess_disable tbl
+       get_thread_info tsk
 
        adr     tbl, sys_call_table             @ load syscall table pointer
 
@@ -237,13 +226,17 @@ ENTRY(vector_swi)
         * get the old ABI syscall table address.
         */
        bics    r10, r10, #0xff000000
+       strne   r10, [tsk, #TI_ABI_SYSCALL]
+       streq   scno, [tsk, #TI_ABI_SYSCALL]
        eorne   scno, r10, #__NR_OABI_SYSCALL_BASE
        ldrne   tbl, =sys_oabi_call_table
 #elif !defined(CONFIG_AEABI)
        bic     scno, scno, #0xff000000         @ mask off SWI op-code
+       str     scno, [tsk, #TI_ABI_SYSCALL]
        eor     scno, scno, #__NR_SYSCALL_BASE  @ check OS number
+#else
+       str     scno, [tsk, #TI_ABI_SYSCALL]
 #endif
-       get_thread_info tsk
        /*
         * Reload the registers that may have been corrupted on entry to
         * the syscall assembly (by tracing or context tracking.)
@@ -288,7 +281,6 @@ ENDPROC(vector_swi)
         * context switches, and waiting for our parent to respond.
         */
 __sys_trace:
-       mov     r1, scno
        add     r0, sp, #S_OFF
        bl      syscall_trace_enter
        mov     scno, r0
index bb5ad8a..0e2d305 100644 (file)
@@ -106,7 +106,7 @@ void __show_regs(struct pt_regs *regs)
        unsigned long flags;
        char buf[64];
 #ifndef CONFIG_CPU_V7M
-       unsigned int domain, fs;
+       unsigned int domain;
 #ifdef CONFIG_CPU_SW_DOMAIN_PAN
        /*
         * Get the domain register for the parent context. In user
@@ -115,14 +115,11 @@ void __show_regs(struct pt_regs *regs)
         */
        if (user_mode(regs)) {
                domain = DACR_UACCESS_ENABLE;
-               fs = get_fs();
        } else {
                domain = to_svc_pt_regs(regs)->dacr;
-               fs = to_svc_pt_regs(regs)->addr_limit;
        }
 #else
        domain = get_domain();
-       fs = get_fs();
 #endif
 #endif
 
@@ -158,8 +155,6 @@ void __show_regs(struct pt_regs *regs)
                if ((domain & domain_mask(DOMAIN_USER)) ==
                    domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
                        segment = "none";
-               else if (fs == KERNEL_DS)
-                       segment = "kernel";
                else
                        segment = "user";
 
index b008859..43b963e 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/tracehook.h>
 #include <linux/unistd.h>
 
+#include <asm/syscall.h>
 #include <asm/traps.h>
 
 #define CREATE_TRACE_POINTS
@@ -785,7 +786,8 @@ long arch_ptrace(struct task_struct *child, long request,
                        break;
 
                case PTRACE_SET_SYSCALL:
-                       task_thread_info(child)->syscall = data;
+                       task_thread_info(child)->abi_syscall = data &
+                                                       __NR_SYSCALL_MASK;
                        ret = 0;
                        break;
 
@@ -844,14 +846,14 @@ static void tracehook_report_syscall(struct pt_regs *regs,
        if (dir == PTRACE_SYSCALL_EXIT)
                tracehook_report_syscall_exit(regs, 0);
        else if (tracehook_report_syscall_entry(regs))
-               current_thread_info()->syscall = -1;
+               current_thread_info()->abi_syscall = -1;
 
        regs->ARM_ip = ip;
 }
 
-asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
+asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 {
-       current_thread_info()->syscall = scno;
+       int scno;
 
        if (test_thread_flag(TIF_SYSCALL_TRACE))
                tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
@@ -862,11 +864,11 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
                return -1;
 #else
        /* XXX: remove this once OABI gets fixed */
-       secure_computing_strict(current_thread_info()->syscall);
+       secure_computing_strict(syscall_get_nr(current, regs));
 #endif
 
        /* Tracer or seccomp may have changed syscall. */
-       scno = current_thread_info()->syscall;
+       scno = syscall_get_nr(current, regs);
 
        if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
                trace_sys_enter(regs, scno);
index 4e0dcff..d0a800b 100644 (file)
@@ -669,14 +669,6 @@ struct page *get_signal_page(void)
        return page;
 }
 
-/* Defer to generic check */
-asmlinkage void addr_limit_check_failed(void)
-{
-#ifdef CONFIG_MMU
-       addr_limit_user_check();
-#endif
-}
-
 #ifdef CONFIG_DEBUG_RSEQ
 asmlinkage void do_rseq_syscall(struct pt_regs *regs)
 {
index 075a2e0..68112c1 100644 (file)
 #include <linux/socket.h>
 #include <linux/net.h>
 #include <linux/ipc.h>
+#include <linux/ipc_namespace.h>
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 
+#include <asm/syscall.h>
+
 struct oldabi_stat64 {
        unsigned long long st_dev;
        unsigned int    __pad1;
@@ -191,60 +194,87 @@ struct oabi_flock64 {
        pid_t   l_pid;
 } __attribute__ ((packed,aligned(4)));
 
-static long do_locks(unsigned int fd, unsigned int cmd,
-                                unsigned long arg)
+static int get_oabi_flock(struct flock64 *kernel, struct oabi_flock64 __user *arg)
 {
-       struct flock64 kernel;
        struct oabi_flock64 user;
-       mm_segment_t fs;
-       long ret;
 
        if (copy_from_user(&user, (struct oabi_flock64 __user *)arg,
                           sizeof(user)))
                return -EFAULT;
-       kernel.l_type   = user.l_type;
-       kernel.l_whence = user.l_whence;
-       kernel.l_start  = user.l_start;
-       kernel.l_len    = user.l_len;
-       kernel.l_pid    = user.l_pid;
-
-       fs = get_fs();
-       set_fs(KERNEL_DS);
-       ret = sys_fcntl64(fd, cmd, (unsigned long)&kernel);
-       set_fs(fs);
-
-       if (!ret && (cmd == F_GETLK64 || cmd == F_OFD_GETLK)) {
-               user.l_type     = kernel.l_type;
-               user.l_whence   = kernel.l_whence;
-               user.l_start    = kernel.l_start;
-               user.l_len      = kernel.l_len;
-               user.l_pid      = kernel.l_pid;
-               if (copy_to_user((struct oabi_flock64 __user *)arg,
-                                &user, sizeof(user)))
-                       ret = -EFAULT;
-       }
-       return ret;
+
+       kernel->l_type   = user.l_type;
+       kernel->l_whence = user.l_whence;
+       kernel->l_start  = user.l_start;
+       kernel->l_len    = user.l_len;
+       kernel->l_pid    = user.l_pid;
+
+       return 0;
+}
+
+static int put_oabi_flock(struct flock64 *kernel, struct oabi_flock64 __user *arg)
+{
+       struct oabi_flock64 user;
+
+       user.l_type     = kernel->l_type;
+       user.l_whence   = kernel->l_whence;
+       user.l_start    = kernel->l_start;
+       user.l_len      = kernel->l_len;
+       user.l_pid      = kernel->l_pid;
+
+       if (copy_to_user((struct oabi_flock64 __user *)arg,
+                        &user, sizeof(user)))
+               return -EFAULT;
+
+       return 0;
 }
 
 asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd,
                                 unsigned long arg)
 {
+       void __user *argp = (void __user *)arg;
+       struct fd f = fdget_raw(fd);
+       struct flock64 flock;
+       long err = -EBADF;
+
+       if (!f.file)
+               goto out;
+
        switch (cmd) {
-       case F_OFD_GETLK:
-       case F_OFD_SETLK:
-       case F_OFD_SETLKW:
        case F_GETLK64:
+       case F_OFD_GETLK:
+               err = security_file_fcntl(f.file, cmd, arg);
+               if (err)
+                       break;
+               err = get_oabi_flock(&flock, argp);
+               if (err)
+                       break;
+               err = fcntl_getlk64(f.file, cmd, &flock);
+               if (!err)
+                      err = put_oabi_flock(&flock, argp);
+               break;
        case F_SETLK64:
        case F_SETLKW64:
-               return do_locks(fd, cmd, arg);
-
+       case F_OFD_SETLK:
+       case F_OFD_SETLKW:
+               err = security_file_fcntl(f.file, cmd, arg);
+               if (err)
+                       break;
+               err = get_oabi_flock(&flock, argp);
+               if (err)
+                       break;
+               err = fcntl_setlk64(fd, f.file, cmd, &flock);
+               break;
        default:
-               return sys_fcntl64(fd, cmd, arg);
+               err = sys_fcntl64(fd, cmd, arg);
+               break;
        }
+       fdput(f);
+out:
+       return err;
 }
 
 struct oabi_epoll_event {
-       __u32 events;
+       __poll_t events;
        __u64 data;
 } __attribute__ ((packed,aligned(4)));
 
@@ -264,55 +294,34 @@ asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
 
        return do_epoll_ctl(epfd, op, fd, &kernel, false);
 }
-
-asmlinkage long sys_oabi_epoll_wait(int epfd,
-                                   struct oabi_epoll_event __user *events,
-                                   int maxevents, int timeout)
-{
-       struct epoll_event *kbuf;
-       struct oabi_epoll_event e;
-       mm_segment_t fs;
-       long ret, err, i;
-
-       if (maxevents <= 0 ||
-                       maxevents > (INT_MAX/sizeof(*kbuf)) ||
-                       maxevents > (INT_MAX/sizeof(*events)))
-               return -EINVAL;
-       if (!access_ok(events, sizeof(*events) * maxevents))
-               return -EFAULT;
-       kbuf = kmalloc_array(maxevents, sizeof(*kbuf), GFP_KERNEL);
-       if (!kbuf)
-               return -ENOMEM;
-       fs = get_fs();
-       set_fs(KERNEL_DS);
-       ret = sys_epoll_wait(epfd, kbuf, maxevents, timeout);
-       set_fs(fs);
-       err = 0;
-       for (i = 0; i < ret; i++) {
-               e.events = kbuf[i].events;
-               e.data = kbuf[i].data;
-               err = __copy_to_user(events, &e, sizeof(e));
-               if (err)
-                       break;
-               events++;
-       }
-       kfree(kbuf);
-       return err ? -EFAULT : ret;
-}
 #else
 asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd,
                                   struct oabi_epoll_event __user *event)
 {
        return -EINVAL;
 }
+#endif
 
-asmlinkage long sys_oabi_epoll_wait(int epfd,
-                                   struct oabi_epoll_event __user *events,
-                                   int maxevents, int timeout)
+struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+                struct epoll_event __user *uevent)
 {
-       return -EINVAL;
+       if (in_oabi_syscall()) {
+               struct oabi_epoll_event __user *oevent = (void __user *)uevent;
+
+               if (__put_user(revents, &oevent->events) ||
+                   __put_user(data, &oevent->data))
+                       return NULL;
+
+               return (void __user *)(oevent+1);
+       }
+
+       if (__put_user(revents, &uevent->events) ||
+           __put_user(data, &uevent->data))
+               return NULL;
+
+       return uevent+1;
 }
-#endif
 
 struct oabi_sembuf {
        unsigned short  sem_num;
@@ -321,46 +330,52 @@ struct oabi_sembuf {
        unsigned short  __pad;
 };
 
+#define sc_semopm     sem_ctls[2]
+
+#ifdef CONFIG_SYSVIPC
 asmlinkage long sys_oabi_semtimedop(int semid,
                                    struct oabi_sembuf __user *tsops,
                                    unsigned nsops,
                                    const struct old_timespec32 __user *timeout)
 {
+       struct ipc_namespace *ns;
        struct sembuf *sops;
-       struct old_timespec32 local_timeout;
        long err;
        int i;
 
+       ns = current->nsproxy->ipc_ns;
+       if (nsops > ns->sc_semopm)
+               return -E2BIG;
        if (nsops < 1 || nsops > SEMOPM)
                return -EINVAL;
-       if (!access_ok(tsops, sizeof(*tsops) * nsops))
-               return -EFAULT;
-       sops = kmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
+       sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL);
        if (!sops)
                return -ENOMEM;
        err = 0;
        for (i = 0; i < nsops; i++) {
                struct oabi_sembuf osb;
-               err |= __copy_from_user(&osb, tsops, sizeof(osb));
+               err |= copy_from_user(&osb, tsops, sizeof(osb));
                sops[i].sem_num = osb.sem_num;
                sops[i].sem_op = osb.sem_op;
                sops[i].sem_flg = osb.sem_flg;
                tsops++;
        }
-       if (timeout) {
-               /* copy this as well before changing domain protection */
-               err |= copy_from_user(&local_timeout, timeout, sizeof(*timeout));
-               timeout = &local_timeout;
-       }
        if (err) {
                err = -EFAULT;
-       } else {
-               mm_segment_t fs = get_fs();
-               set_fs(KERNEL_DS);
-               err = sys_semtimedop_time32(semid, sops, nsops, timeout);
-               set_fs(fs);
+               goto out;
        }
-       kfree(sops);
+
+       if (timeout) {
+               struct timespec64 ts;
+               err = get_old_timespec32(&ts, timeout);
+               if (err)
+                       goto out;
+               err = __do_semtimedop(semid, sops, nsops, &ts, ns);
+               goto out;
+       }
+       err = __do_semtimedop(semid, sops, nsops, NULL, ns);
+out:
+       kvfree(sops);
        return err;
 }
 
@@ -387,6 +402,27 @@ asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
                return sys_ipc(call, first, second, third, ptr, fifth);
        }
 }
+#else
+asmlinkage long sys_oabi_semtimedop(int semid,
+                                   struct oabi_sembuf __user *tsops,
+                                   unsigned nsops,
+                                   const struct old_timespec32 __user *timeout)
+{
+       return -ENOSYS;
+}
+
+asmlinkage long sys_oabi_semop(int semid, struct oabi_sembuf __user *tsops,
+                              unsigned nsops)
+{
+       return -ENOSYS;
+}
+
+asmlinkage int sys_oabi_ipc(uint call, int first, int second, int third,
+                           void __user *ptr, long fifth)
+{
+       return -ENOSYS;
+}
+#endif
 
 asmlinkage long sys_oabi_bind(int fd, struct sockaddr __user *addr, int addrlen)
 {
index e9b4f2b..4a7edc6 100644 (file)
@@ -122,17 +122,8 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
                     unsigned long top)
 {
        unsigned long first;
-       mm_segment_t fs;
        int i;
 
-       /*
-        * We need to switch to kernel mode so that we can use __get_user
-        * to safely read from kernel space.  Note that we now dump the
-        * code first, just in case the backtrace kills us.
-        */
-       fs = get_fs();
-       set_fs(KERNEL_DS);
-
        printk("%s%s(0x%08lx to 0x%08lx)\n", lvl, str, bottom, top);
 
        for (first = bottom & ~31; first < top; first += 32) {
@@ -145,7 +136,7 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
                for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
                        if (p >= bottom && p < top) {
                                unsigned long val;
-                               if (__get_user(val, (unsigned long *)p) == 0)
+                               if (get_kernel_nofault(val, (unsigned long *)p))
                                        sprintf(str + i * 9, " %08lx", val);
                                else
                                        sprintf(str + i * 9, " ????????");
@@ -153,11 +144,9 @@ static void dump_mem(const char *lvl, const char *str, unsigned long bottom,
                }
                printk("%s%04lx:%s\n", lvl, first & 0xffff, str);
        }
-
-       set_fs(fs);
 }
 
-static void __dump_instr(const char *lvl, struct pt_regs *regs)
+static void dump_instr(const char *lvl, struct pt_regs *regs)
 {
        unsigned long addr = instruction_pointer(regs);
        const int thumb = thumb_mode(regs);
@@ -173,10 +162,20 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
        for (i = -4; i < 1 + !!thumb; i++) {
                unsigned int val, bad;
 
-               if (thumb)
-                       bad = get_user(val, &((u16 *)addr)[i]);
-               else
-                       bad = get_user(val, &((u32 *)addr)[i]);
+               if (!user_mode(regs)) {
+                       if (thumb) {
+                               u16 val16;
+                               bad = get_kernel_nofault(val16, &((u16 *)addr)[i]);
+                               val = val16;
+                       } else {
+                               bad = get_kernel_nofault(val, &((u32 *)addr)[i]);
+                       }
+               } else {
+                       if (thumb)
+                               bad = get_user(val, &((u16 *)addr)[i]);
+                       else
+                               bad = get_user(val, &((u32 *)addr)[i]);
+               }
 
                if (!bad)
                        p += sprintf(p, i == 0 ? "(%0*x) " : "%0*x ",
@@ -189,20 +188,6 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
        printk("%sCode: %s\n", lvl, str);
 }
 
-static void dump_instr(const char *lvl, struct pt_regs *regs)
-{
-       mm_segment_t fs;
-
-       if (!user_mode(regs)) {
-               fs = get_fs();
-               set_fs(KERNEL_DS);
-               __dump_instr(lvl, regs);
-               set_fs(fs);
-       } else {
-               __dump_instr(lvl, regs);
-       }
-}
-
 #ifdef CONFIG_ARM_UNWIND
 static inline void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
                                  const char *loglvl)
index f8016e3..480a207 100644 (file)
 
 ENTRY(arm_copy_from_user)
 #ifdef CONFIG_CPU_SPECTRE
-       get_thread_info r3
-       ldr     r3, [r3, #TI_ADDR_LIMIT]
+       ldr     r3, =TASK_SIZE
        uaccess_mask_range_ptr r1, r2, r3, ip
 #endif
 
index ebfe4cb..842ea5e 100644 (file)
 ENTRY(__copy_to_user_std)
 WEAK(arm_copy_to_user)
 #ifdef CONFIG_CPU_SPECTRE
-       get_thread_info r3
-       ldr     r3, [r3, #TI_ADDR_LIMIT]
+       ldr     r3, =TASK_SIZE
        uaccess_mask_range_ptr r0, r2, r3, ip
 #endif
 
index 7e0a9b6..e842209 100644 (file)
 249    common  lookup_dcookie          sys_lookup_dcookie
 250    common  epoll_create            sys_epoll_create
 251    common  epoll_ctl               sys_epoll_ctl           sys_oabi_epoll_ctl
-252    common  epoll_wait              sys_epoll_wait          sys_oabi_epoll_wait
+252    common  epoll_wait              sys_epoll_wait
 253    common  remap_file_pages        sys_remap_file_pages
 # 254 for set_thread_area
 # 255 for get_thread_area
index 5af51ac..2bd90c5 100644 (file)
@@ -110,6 +110,7 @@ config S390
        select ARCH_STACKWALK
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+       select ARCH_SUPPORTS_HUGETLBFS
        select ARCH_SUPPORTS_NUMA_BALANCING
        select ARCH_USE_BUILTIN_BSWAP
        select ARCH_USE_CMPXCHG_LOCKREF
index 11ffc7c..37b6115 100644 (file)
@@ -804,6 +804,7 @@ CONFIG_DEBUG_VM_PGFLAGS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
 CONFIG_DEBUG_PER_CPU_MAPS=y
+CONFIG_KFENCE=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_DETECT_HUNG_TASK=y
index e1642d2..56a1cc8 100644 (file)
@@ -397,7 +397,6 @@ CONFIG_BLK_DEV_DRBD=m
 CONFIG_BLK_DEV_NBD=m
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=32768
-# CONFIG_BLK_DEV_XPRAM is not set
 CONFIG_VIRTIO_BLK=y
 CONFIG_BLK_DEV_RBD=m
 CONFIG_BLK_DEV_NVME=m
index d576aaa..aceccf3 100644 (file)
@@ -35,7 +35,6 @@ CONFIG_NET=y
 # CONFIG_ETHTOOL_NETLINK is not set
 CONFIG_DEVTMPFS=y
 CONFIG_BLK_DEV_RAM=y
-# CONFIG_BLK_DEV_XPRAM is not set
 # CONFIG_DCSSBLK is not set
 # CONFIG_DASD is not set
 CONFIG_ENCLOSURE_SERVICES=y
index ca0e0e5..f87a478 100644 (file)
@@ -24,13 +24,6 @@ enum cpumf_ctr_set {
 
 #define CPUMF_LCCTL_ENABLE_SHIFT    16
 #define CPUMF_LCCTL_ACTCTL_SHIFT     0
-static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
-       [CPUMF_CTR_SET_BASIC]   = 0x02,
-       [CPUMF_CTR_SET_USER]    = 0x04,
-       [CPUMF_CTR_SET_CRYPTO]  = 0x08,
-       [CPUMF_CTR_SET_EXT]     = 0x01,
-       [CPUMF_CTR_SET_MT_DIAG] = 0x20,
-};
 
 static inline void ctr_set_enable(u64 *state, u64 ctrsets)
 {
index e317fd4..f16f4d0 100644 (file)
@@ -18,6 +18,7 @@ extern struct mutex smp_cpu_state_mutex;
 extern unsigned int smp_cpu_mt_shift;
 extern unsigned int smp_cpu_mtid;
 extern __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
+extern cpumask_t cpu_setup_mask;
 
 extern int __cpu_up(unsigned int cpu, struct task_struct *tidle);
 
index 3d8a4b9..dd00d98 100644 (file)
@@ -34,16 +34,6 @@ static inline bool on_stack(struct stack_info *info,
        return addr >= info->begin && addr + len <= info->end;
 }
 
-static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
-                                                      struct pt_regs *regs)
-{
-       if (regs)
-               return (unsigned long) kernel_stack_pointer(regs);
-       if (task == current)
-               return current_stack_pointer();
-       return (unsigned long) task->thread.ksp;
-}
-
 /*
  * Stack layout of a C stack frame.
  */
@@ -74,6 +64,16 @@ struct stack_frame {
        ((unsigned long)__builtin_frame_address(0) -                    \
         offsetof(struct stack_frame, back_chain))
 
+static __always_inline unsigned long get_stack_pointer(struct task_struct *task,
+                                                      struct pt_regs *regs)
+{
+       if (regs)
+               return (unsigned long)kernel_stack_pointer(regs);
+       if (task == current)
+               return current_frame_address();
+       return (unsigned long)task->thread.ksp;
+}
+
 /*
  * To keep this simple mark register 2-6 as being changed (volatile)
  * by the called function, even though register 6 is saved/nonvolatile.
index de9006b..5ebf534 100644 (file)
@@ -55,10 +55,10 @@ static inline bool unwind_error(struct unwind_state *state)
        return state->error;
 }
 
-static inline void unwind_start(struct unwind_state *state,
-                               struct task_struct *task,
-                               struct pt_regs *regs,
-                               unsigned long first_frame)
+static __always_inline void unwind_start(struct unwind_state *state,
+                                        struct task_struct *task,
+                                        struct pt_regs *regs,
+                                        unsigned long first_frame)
 {
        task = task ?: current;
        first_frame = first_frame ?: get_stack_pointer(task, regs);
index b9716a7..4c9b967 100644 (file)
@@ -140,10 +140,10 @@ _LPP_OFFSET       = __LC_LPP
        TSTMSK  __LC_MCCK_CODE,(MCCK_CODE_STG_ERROR|MCCK_CODE_STG_KEY_ERROR)
        jnz     \errlabel
        TSTMSK  __LC_MCCK_CODE,MCCK_CODE_STG_DEGRAD
-       jz      oklabel\@
+       jz      .Loklabel\@
        TSTMSK  __LC_MCCK_CODE,MCCK_CODE_STG_FAIL_ADDR
        jnz     \errlabel
-oklabel\@:
+.Loklabel\@:
        .endm
 
 #if IS_ENABLED(CONFIG_KVM)
index 0a464d3..1d94ffd 100644 (file)
@@ -341,13 +341,13 @@ NOKPROBE_SYMBOL(prepare_ftrace_return);
  */
 int ftrace_enable_ftrace_graph_caller(void)
 {
-       brcl_disable(__va(ftrace_graph_caller));
+       brcl_disable(ftrace_graph_caller);
        return 0;
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-       brcl_enable(__va(ftrace_graph_caller));
+       brcl_enable(ftrace_graph_caller);
        return 0;
 }
 
index 2e3bb63..4a99154 100644 (file)
@@ -158,6 +158,14 @@ static size_t cfdiag_getctrset(struct cf_ctrset_entry *ctrdata, int ctrset,
        return need;
 }
 
+static const u64 cpumf_ctr_ctl[CPUMF_CTR_SET_MAX] = {
+       [CPUMF_CTR_SET_BASIC]   = 0x02,
+       [CPUMF_CTR_SET_USER]    = 0x04,
+       [CPUMF_CTR_SET_CRYPTO]  = 0x08,
+       [CPUMF_CTR_SET_EXT]     = 0x01,
+       [CPUMF_CTR_SET_MT_DIAG] = 0x20,
+};
+
 /* Read out all counter sets and save them in the provided data buffer.
  * The last 64 byte host an artificial trailer entry.
  */
index 5a01872..67e5fff 100644 (file)
@@ -50,6 +50,7 @@
 #include <linux/compat.h>
 #include <linux/start_kernel.h>
 #include <linux/hugetlb.h>
+#include <linux/kmemleak.h>
 
 #include <asm/boot_data.h>
 #include <asm/ipl.h>
@@ -356,9 +357,12 @@ void *restart_stack;
 unsigned long stack_alloc(void)
 {
 #ifdef CONFIG_VMAP_STACK
-       return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE,
-                       THREADINFO_GFP, NUMA_NO_NODE,
-                       __builtin_return_address(0));
+       void *ret;
+
+       ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
+                            NUMA_NO_NODE, __builtin_return_address(0));
+       kmemleak_not_leak(ret);
+       return (unsigned long)ret;
 #else
        return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
 #endif
index 2a991e4..1a04e5b 100644 (file)
@@ -95,6 +95,7 @@ __vector128 __initdata boot_cpu_vector_save_area[__NUM_VXRS];
 #endif
 
 static unsigned int smp_max_threads __initdata = -1U;
+cpumask_t cpu_setup_mask;
 
 static int __init early_nosmt(char *s)
 {
@@ -902,13 +903,14 @@ static void smp_start_secondary(void *cpuvoid)
        vtime_init();
        vdso_getcpu_init();
        pfault_init();
+       cpumask_set_cpu(cpu, &cpu_setup_mask);
+       update_cpu_masks();
        notify_cpu_starting(cpu);
        if (topology_cpu_dedicated(cpu))
                set_cpu_flag(CIF_DEDICATED_CPU);
        else
                clear_cpu_flag(CIF_DEDICATED_CPU);
        set_cpu_online(cpu, true);
-       update_cpu_masks();
        inc_irq_stat(CPU_RST);
        local_irq_enable();
        cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
@@ -950,10 +952,13 @@ early_param("possible_cpus", _setup_possible_cpus);
 int __cpu_disable(void)
 {
        unsigned long cregs[16];
+       int cpu;
 
        /* Handle possible pending IPIs */
        smp_handle_ext_call();
-       set_cpu_online(smp_processor_id(), false);
+       cpu = smp_processor_id();
+       set_cpu_online(cpu, false);
+       cpumask_clear_cpu(cpu, &cpu_setup_mask);
        update_cpu_masks();
        /* Disable pseudo page faults on this cpu. */
        pfault_fini();
index d2458a2..58f8291 100644 (file)
@@ -67,7 +67,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
        static cpumask_t mask;
 
        cpumask_clear(&mask);
-       if (!cpu_online(cpu))
+       if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
                goto out;
        cpumask_set_cpu(cpu, &mask);
        switch (topology_mode) {
@@ -88,7 +88,7 @@ static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int c
        case TOPOLOGY_MODE_SINGLE:
                break;
        }
-       cpumask_and(&mask, &mask, cpu_online_mask);
+       cpumask_and(&mask, &mask, &cpu_setup_mask);
 out:
        cpumask_copy(dst, &mask);
 }
@@ -99,16 +99,16 @@ static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
        int i;
 
        cpumask_clear(&mask);
-       if (!cpu_online(cpu))
+       if (!cpumask_test_cpu(cpu, &cpu_setup_mask))
                goto out;
        cpumask_set_cpu(cpu, &mask);
        if (topology_mode != TOPOLOGY_MODE_HW)
                goto out;
        cpu -= cpu % (smp_cpu_mtid + 1);
-       for (i = 0; i <= smp_cpu_mtid; i++)
-               if (cpu_present(cpu + i))
+       for (i = 0; i <= smp_cpu_mtid; i++) {
+               if (cpumask_test_cpu(cpu + i, &cpu_setup_mask))
                        cpumask_set_cpu(cpu + i, &mask);
-       cpumask_and(&mask, &mask, cpu_online_mask);
+       }
 out:
        cpumask_copy(dst, &mask);
 }
@@ -569,6 +569,7 @@ void __init topology_init_early(void)
        alloc_masks(info, &book_info, 2);
        alloc_masks(info, &drawer_info, 3);
 out:
+       cpumask_set_cpu(0, &cpu_setup_mask);
        __arch_update_cpu_topology();
        __arch_update_dedicated_flag(NULL);
 }
index 9bb2c75..4d3b33c 100644 (file)
@@ -27,7 +27,6 @@
 
 /**
  * gmap_alloc - allocate and initialize a guest address space
- * @mm: pointer to the parent mm_struct
  * @limit: maximum address of the gmap address space
  *
  * Returns a guest address space structure.
@@ -504,7 +503,7 @@ EXPORT_SYMBOL_GPL(gmap_translate);
 
 /**
  * gmap_unlink - disconnect a page table from the gmap shadow tables
- * @gmap: pointer to guest mapping meta data structure
+ * @mm: pointer to the parent mm_struct
  * @table: pointer to the host page table
  * @vmaddr: vm address associated with the host page table
  */
@@ -527,7 +526,7 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *old, pmd_t new,
                           unsigned long gaddr);
 
 /**
- * gmap_link - set up shadow page tables to connect a host to a guest address
+ * __gmap_link - set up shadow page tables to connect a host to a guest address
  * @gmap: pointer to guest mapping meta data structure
  * @gaddr: guest address
  * @vmaddr: vm address
@@ -1971,7 +1970,7 @@ out_free:
 EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
 
 /**
- * gmap_shadow_lookup_pgtable - find a shadow page table
+ * gmap_shadow_pgt_lookup - find a shadow page table
  * @sg: pointer to the shadow guest address space structure
  * @saddr: the address in the shadow aguest address space
  * @pgt: parent gmap address of the page table to get shadowed
@@ -2165,7 +2164,7 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
 }
 EXPORT_SYMBOL_GPL(gmap_shadow_page);
 
-/**
+/*
  * gmap_shadow_notify - handle notifications for shadow gmap
  *
  * Called with sg->parent->shadow_lock.
@@ -2225,7 +2224,7 @@ static void gmap_shadow_notify(struct gmap *sg, unsigned long vmaddr,
 /**
  * ptep_notify - call all invalidation callbacks for a specific pte.
  * @mm: pointer to the process mm_struct
- * @addr: virtual address in the process address space
+ * @vmaddr: virtual address in the process address space
  * @pte: pointer to the page table entry
  * @bits: bits from the pgste that caused the notify call
  *
index eec3a9d..034721a 100644 (file)
@@ -834,7 +834,7 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(set_guest_storage_key);
 
-/**
+/*
  * Conditionally set a guest storage key (handling csske).
  * oldkey will be updated when either mr or mc is set and a pointer is given.
  *
@@ -867,7 +867,7 @@ int cond_set_guest_storage_key(struct mm_struct *mm, unsigned long addr,
 }
 EXPORT_SYMBOL(cond_set_guest_storage_key);
 
-/**
+/*
  * Reset a guest reference bit (rrbe), returning the reference and changed bit.
  *
  * Returns < 0 in case of error, otherwise the cc to be reported to the guest.
index 51dc221..be077b3 100644 (file)
@@ -383,8 +383,8 @@ static int clp_find_pci(struct clp_req_rsp_list_pci *rrb, u32 fid,
                rc = clp_list_pci_req(rrb, &resume_token, &nentries);
                if (rc)
                        return rc;
+               fh_list = rrb->response.fh_list;
                for (i = 0; i < nentries; i++) {
-                       fh_list = rrb->response.fh_list;
                        if (fh_list[i].fid == fid) {
                                *entry = fh_list[i];
                                return 0;
@@ -449,14 +449,17 @@ int clp_get_state(u32 fid, enum zpci_state *state)
        struct clp_fh_list_entry entry;
        int rc;
 
-       *state = ZPCI_FN_STATE_RESERVED;
        rrb = clp_alloc_block(GFP_ATOMIC);
        if (!rrb)
                return -ENOMEM;
 
        rc = clp_find_pci(rrb, fid, &entry);
-       if (!rc)
+       if (!rc) {
                *state = entry.config_state;
+       } else if (rc == -ENODEV) {
+               *state = ZPCI_FN_STATE_RESERVED;
+               rc = 0;
+       }
 
        clp_free_block(rrb);
        return rc;
index 8de2646..c18b45f 100644 (file)
@@ -24,6 +24,7 @@ config UML
        select SET_FS
        select TRACE_IRQFLAGS_SUPPORT
        select TTY # Needed for line.c
+       select HAVE_ARCH_VMAP_STACK
 
 config MMU
        bool
index 0b80283..c080666 100644 (file)
@@ -56,6 +56,13 @@ static unsigned long um_pci_msi_used[BITS_TO_LONGS(MAX_MSI_VECTORS)];
 
 #define UM_VIRT_PCI_MAXDELAY 40000
 
+struct um_pci_message_buffer {
+       struct virtio_pcidev_msg hdr;
+       u8 data[8];
+};
+
+static struct um_pci_message_buffer __percpu *um_pci_msg_bufs;
+
 static int um_pci_send_cmd(struct um_pci_device *dev,
                           struct virtio_pcidev_msg *cmd,
                           unsigned int cmd_size,
@@ -68,11 +75,12 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
                [1] = extra ? &extra_sg : &in_sg,
                [2] = extra ? &in_sg : NULL,
        };
+       struct um_pci_message_buffer *buf;
        int delay_count = 0;
        int ret, len;
        bool posted;
 
-       if (WARN_ON(cmd_size < sizeof(*cmd)))
+       if (WARN_ON(cmd_size < sizeof(*cmd) || cmd_size > sizeof(*buf)))
                return -EINVAL;
 
        switch (cmd->op) {
@@ -88,6 +96,9 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
                break;
        }
 
+       buf = get_cpu_var(um_pci_msg_bufs);
+       memcpy(buf, cmd, cmd_size);
+
        if (posted) {
                u8 *ncmd = kmalloc(cmd_size + extra_size, GFP_ATOMIC);
 
@@ -102,7 +113,10 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
                } else {
                        /* try without allocating memory */
                        posted = false;
+                       cmd = (void *)buf;
                }
+       } else {
+               cmd = (void *)buf;
        }
 
        sg_init_one(&out_sg, cmd, cmd_size);
@@ -118,11 +132,12 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
                                posted ? cmd : HANDLE_NO_FREE(cmd),
                                GFP_ATOMIC);
        if (ret)
-               return ret;
+               goto out;
 
        if (posted) {
                virtqueue_kick(dev->cmd_vq);
-               return 0;
+               ret = 0;
+               goto out;
        }
 
        /* kick and poll for getting a response on the queue */
@@ -148,6 +163,8 @@ static int um_pci_send_cmd(struct um_pci_device *dev,
        }
        clear_bit(UM_PCI_STAT_WAITING, &dev->status);
 
+out:
+       put_cpu_var(um_pci_msg_bufs);
        return ret;
 }
 
@@ -161,12 +178,17 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
                .size = size,
                .addr = offset,
        };
-       /* maximum size - we may only use parts of it */
-       u8 data[8];
+       /* buf->data is maximum size - we may only use parts of it */
+       struct um_pci_message_buffer *buf;
+       u8 *data;
+       unsigned long ret = ~0ULL;
 
        if (!dev)
                return ~0ULL;
 
+       buf = get_cpu_var(um_pci_msg_bufs);
+       data = buf->data;
+
        memset(data, 0xff, sizeof(data));
 
        switch (size) {
@@ -179,27 +201,34 @@ static unsigned long um_pci_cfgspace_read(void *priv, unsigned int offset,
                break;
        default:
                WARN(1, "invalid config space read size %d\n", size);
-               return ~0ULL;
+               goto out;
        }
 
-       if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0,
-                           data, sizeof(data)))
-               return ~0ULL;
+       if (um_pci_send_cmd(dev, &hdr, sizeof(hdr), NULL, 0, data, 8))
+               goto out;
 
        switch (size) {
        case 1:
-               return data[0];
+               ret = data[0];
+               break;
        case 2:
-               return le16_to_cpup((void *)data);
+               ret = le16_to_cpup((void *)data);
+               break;
        case 4:
-               return le32_to_cpup((void *)data);
+               ret = le32_to_cpup((void *)data);
+               break;
 #ifdef CONFIG_64BIT
        case 8:
-               return le64_to_cpup((void *)data);
+               ret = le64_to_cpup((void *)data);
+               break;
 #endif
        default:
-               return ~0ULL;
+               break;
        }
+
+out:
+       put_cpu_var(um_pci_msg_bufs);
+       return ret;
 }
 
 static void um_pci_cfgspace_write(void *priv, unsigned int offset, int size,
@@ -272,8 +301,13 @@ static void um_pci_bar_copy_from(void *priv, void *buffer,
 static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
                                     int size)
 {
-       /* maximum size - we may only use parts of it */
-       u8 data[8];
+       /* buf->data is maximum size - we may only use parts of it */
+       struct um_pci_message_buffer *buf;
+       u8 *data;
+       unsigned long ret = ~0ULL;
+
+       buf = get_cpu_var(um_pci_msg_bufs);
+       data = buf->data;
 
        switch (size) {
        case 1:
@@ -285,25 +319,33 @@ static unsigned long um_pci_bar_read(void *priv, unsigned int offset,
                break;
        default:
                WARN(1, "invalid config space read size %d\n", size);
-               return ~0ULL;
+               goto out;
        }
 
        um_pci_bar_copy_from(priv, data, offset, size);
 
        switch (size) {
        case 1:
-               return data[0];
+               ret = data[0];
+               break;
        case 2:
-               return le16_to_cpup((void *)data);
+               ret = le16_to_cpup((void *)data);
+               break;
        case 4:
-               return le32_to_cpup((void *)data);
+               ret = le32_to_cpup((void *)data);
+               break;
 #ifdef CONFIG_64BIT
        case 8:
-               return le64_to_cpup((void *)data);
+               ret = le64_to_cpup((void *)data);
+               break;
 #endif
        default:
-               return ~0ULL;
+               break;
        }
+
+out:
+       put_cpu_var(um_pci_msg_bufs);
+       return ret;
 }
 
 static void um_pci_bar_copy_to(void *priv, unsigned int offset,
@@ -810,7 +852,7 @@ void *pci_root_bus_fwnode(struct pci_bus *bus)
        return um_pci_fwnode;
 }
 
-int um_pci_init(void)
+static int um_pci_init(void)
 {
        int err, i;
 
@@ -823,10 +865,16 @@ int um_pci_init(void)
                 "No virtio device ID configured for PCI - no PCI support\n"))
                return 0;
 
-       bridge = pci_alloc_host_bridge(0);
-       if (!bridge)
+       um_pci_msg_bufs = alloc_percpu(struct um_pci_message_buffer);
+       if (!um_pci_msg_bufs)
                return -ENOMEM;
 
+       bridge = pci_alloc_host_bridge(0);
+       if (!bridge) {
+               err = -ENOMEM;
+               goto free;
+       }
+
        um_pci_fwnode = irq_domain_alloc_named_fwnode("um-pci");
        if (!um_pci_fwnode) {
                err = -ENOMEM;
@@ -878,18 +926,22 @@ free:
                irq_domain_remove(um_pci_inner_domain);
        if (um_pci_fwnode)
                irq_domain_free_fwnode(um_pci_fwnode);
-       pci_free_resource_list(&bridge->windows);
-       pci_free_host_bridge(bridge);
+       if (bridge) {
+               pci_free_resource_list(&bridge->windows);
+               pci_free_host_bridge(bridge);
+       }
+       free_percpu(um_pci_msg_bufs);
        return err;
 }
 module_init(um_pci_init);
 
-void um_pci_exit(void)
+static void um_pci_exit(void)
 {
        unregister_virtio_driver(&um_pci_virtio_driver);
        irq_domain_remove(um_pci_msi_domain);
        irq_domain_remove(um_pci_inner_domain);
        pci_free_resource_list(&bridge->windows);
        pci_free_host_bridge(bridge);
+       free_percpu(um_pci_msg_bufs);
 }
 module_exit(um_pci_exit);
index 4412d6f..d51e445 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
 #include <linux/time-internal.h>
+#include <linux/virtio-uml.h>
 #include <shared/as-layout.h>
 #include <irq_kern.h>
 #include <init.h>
@@ -1139,7 +1140,7 @@ static int virtio_uml_probe(struct platform_device *pdev)
                rc = os_connect_socket(pdata->socket_path);
        } while (rc == -EINTR);
        if (rc < 0)
-               return rc;
+               goto error_free;
        vu_dev->sock = rc;
 
        spin_lock_init(&vu_dev->sock_lock);
@@ -1160,6 +1161,8 @@ static int virtio_uml_probe(struct platform_device *pdev)
 
 error_init:
        os_close_file(vu_dev->sock);
+error_free:
+       kfree(vu_dev);
        return rc;
 }
 
index 5afac0f..ff5061f 100644 (file)
@@ -24,8 +24,7 @@
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_clone_handler(void)
 {
-       int stack;
-       struct stub_data *data = (void *) ((unsigned long)&stack & ~(UM_KERN_PAGE_SIZE - 1));
+       struct stub_data *data = get_stub_page();
        long err;
 
        err = stub_syscall2(__NR_clone, CLONE_PARENT | CLONE_FILES | SIGCHLD,
index b95db9d..4c6c2be 100644 (file)
@@ -101,4 +101,16 @@ static inline void remap_stack_and_trap(void)
                "memory");
 }
 
+static __always_inline void *get_stub_page(void)
+{
+       unsigned long ret;
+
+       asm volatile (
+               "movl %%esp,%0 ;"
+               "andl %1,%0"
+               : "=a" (ret)
+               : "g" (~(UM_KERN_PAGE_SIZE - 1)));
+
+       return (void *)ret;
+}
 #endif
index 6e2626b..e9c4b2b 100644 (file)
@@ -108,4 +108,16 @@ static inline void remap_stack_and_trap(void)
                __syscall_clobber, "r10", "r8", "r9");
 }
 
+static __always_inline void *get_stub_page(void)
+{
+       unsigned long ret;
+
+       asm volatile (
+               "movq %%rsp,%0 ;"
+               "andq %1,%0"
+               : "=a" (ret)
+               : "g" (~(UM_KERN_PAGE_SIZE - 1)));
+
+       return (void *)ret;
+}
 #endif
index 21836ea..f7eefba 100644 (file)
@@ -11,9 +11,8 @@
 void __attribute__ ((__section__ (".__syscall_stub")))
 stub_segv_handler(int sig, siginfo_t *info, void *p)
 {
-       int stack;
+       struct faultinfo *f = get_stub_page();
        ucontext_t *uc = p;
-       struct faultinfo *f = (void *)(((unsigned long)&stack) & ~(UM_KERN_PAGE_SIZE - 1));
 
        GET_FAULTINFO_FROM_MC(*f, &uc->uc_mcontext);
        trap_myself();
index 3295405..d1aaabc 100644 (file)
@@ -1,11 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_CXL_BUS) += cxl_core.o
+obj-$(CONFIG_CXL_BUS) += core/
 obj-$(CONFIG_CXL_MEM) += cxl_pci.o
 obj-$(CONFIG_CXL_ACPI) += cxl_acpi.o
 obj-$(CONFIG_CXL_PMEM) += cxl_pmem.o
 
-ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CXL
-cxl_core-y := core.o
 cxl_pci-y := pci.o
 cxl_acpi-y := acpi.o
 cxl_pmem-y := pmem.o
index 8ae8927..54e9d4d 100644 (file)
@@ -243,6 +243,9 @@ static struct acpi_device *to_cxl_host_bridge(struct device *dev)
 {
        struct acpi_device *adev = to_acpi_device(dev);
 
+       if (!acpi_pci_find_root(adev->handle))
+               return NULL;
+
        if (strcmp(acpi_device_hid(adev), "ACPI0016") == 0)
                return adev;
        return NULL;
@@ -266,10 +269,6 @@ static int add_host_bridge_uport(struct device *match, void *arg)
        if (!bridge)
                return 0;
 
-       pci_root = acpi_pci_find_root(bridge->handle);
-       if (!pci_root)
-               return -ENXIO;
-
        dport = find_dport_by_dev(root_port, match);
        if (!dport) {
                dev_dbg(host, "host bridge expected and not found\n");
@@ -282,6 +281,11 @@ static int add_host_bridge_uport(struct device *match, void *arg)
                return PTR_ERR(port);
        dev_dbg(host, "%s: add: %s\n", dev_name(match), dev_name(&port->dev));
 
+       /*
+        * Note that this lookup already succeeded in
+        * to_cxl_host_bridge(), so no need to check for failure here
+        */
+       pci_root = acpi_pci_find_root(bridge->handle);
        ctx = (struct cxl_walk_context){
                .dev = host,
                .root = pci_root->bus,
diff --git a/drivers/cxl/core.c b/drivers/cxl/core.c
deleted file mode 100644 (file)
index 2b90b7c..0000000
+++ /dev/null
@@ -1,1066 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
-#include <linux/io-64-nonatomic-lo-hi.h>
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/slab.h>
-#include <linux/idr.h>
-#include "cxl.h"
-#include "mem.h"
-
-/**
- * DOC: cxl core
- *
- * The CXL core provides a sysfs hierarchy for control devices and a rendezvous
- * point for cross-device interleave coordination through cxl ports.
- */
-
-static DEFINE_IDA(cxl_port_ida);
-
-static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
-                           char *buf)
-{
-       return sysfs_emit(buf, "%s\n", dev->type->name);
-}
-static DEVICE_ATTR_RO(devtype);
-
-static struct attribute *cxl_base_attributes[] = {
-       &dev_attr_devtype.attr,
-       NULL,
-};
-
-static struct attribute_group cxl_base_attribute_group = {
-       .attrs = cxl_base_attributes,
-};
-
-static ssize_t start_show(struct device *dev, struct device_attribute *attr,
-                         char *buf)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-       return sysfs_emit(buf, "%#llx\n", cxld->range.start);
-}
-static DEVICE_ATTR_RO(start);
-
-static ssize_t size_show(struct device *dev, struct device_attribute *attr,
-                       char *buf)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-       return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range));
-}
-static DEVICE_ATTR_RO(size);
-
-#define CXL_DECODER_FLAG_ATTR(name, flag)                            \
-static ssize_t name##_show(struct device *dev,                       \
-                          struct device_attribute *attr, char *buf) \
-{                                                                    \
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);              \
-                                                                     \
-       return sysfs_emit(buf, "%s\n",                               \
-                         (cxld->flags & (flag)) ? "1" : "0");       \
-}                                                                    \
-static DEVICE_ATTR_RO(name)
-
-CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
-CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
-CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
-CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
-CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
-
-static ssize_t target_type_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-
-       switch (cxld->target_type) {
-       case CXL_DECODER_ACCELERATOR:
-               return sysfs_emit(buf, "accelerator\n");
-       case CXL_DECODER_EXPANDER:
-               return sysfs_emit(buf, "expander\n");
-       }
-       return -ENXIO;
-}
-static DEVICE_ATTR_RO(target_type);
-
-static ssize_t target_list_show(struct device *dev,
-                              struct device_attribute *attr, char *buf)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-       ssize_t offset = 0;
-       int i, rc = 0;
-
-       device_lock(dev);
-       for (i = 0; i < cxld->interleave_ways; i++) {
-               struct cxl_dport *dport = cxld->target[i];
-               struct cxl_dport *next = NULL;
-
-               if (!dport)
-                       break;
-
-               if (i + 1 < cxld->interleave_ways)
-                       next = cxld->target[i + 1];
-               rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
-                                  next ? "," : "");
-               if (rc < 0)
-                       break;
-               offset += rc;
-       }
-       device_unlock(dev);
-
-       if (rc < 0)
-               return rc;
-
-       rc = sysfs_emit_at(buf, offset, "\n");
-       if (rc < 0)
-               return rc;
-
-       return offset + rc;
-}
-static DEVICE_ATTR_RO(target_list);
-
-static struct attribute *cxl_decoder_base_attrs[] = {
-       &dev_attr_start.attr,
-       &dev_attr_size.attr,
-       &dev_attr_locked.attr,
-       &dev_attr_target_list.attr,
-       NULL,
-};
-
-static struct attribute_group cxl_decoder_base_attribute_group = {
-       .attrs = cxl_decoder_base_attrs,
-};
-
-static struct attribute *cxl_decoder_root_attrs[] = {
-       &dev_attr_cap_pmem.attr,
-       &dev_attr_cap_ram.attr,
-       &dev_attr_cap_type2.attr,
-       &dev_attr_cap_type3.attr,
-       NULL,
-};
-
-static struct attribute_group cxl_decoder_root_attribute_group = {
-       .attrs = cxl_decoder_root_attrs,
-};
-
-static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
-       &cxl_decoder_root_attribute_group,
-       &cxl_decoder_base_attribute_group,
-       &cxl_base_attribute_group,
-       NULL,
-};
-
-static struct attribute *cxl_decoder_switch_attrs[] = {
-       &dev_attr_target_type.attr,
-       NULL,
-};
-
-static struct attribute_group cxl_decoder_switch_attribute_group = {
-       .attrs = cxl_decoder_switch_attrs,
-};
-
-static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
-       &cxl_decoder_switch_attribute_group,
-       &cxl_decoder_base_attribute_group,
-       &cxl_base_attribute_group,
-       NULL,
-};
-
-static void cxl_decoder_release(struct device *dev)
-{
-       struct cxl_decoder *cxld = to_cxl_decoder(dev);
-       struct cxl_port *port = to_cxl_port(dev->parent);
-
-       ida_free(&port->decoder_ida, cxld->id);
-       kfree(cxld);
-}
-
-static const struct device_type cxl_decoder_switch_type = {
-       .name = "cxl_decoder_switch",
-       .release = cxl_decoder_release,
-       .groups = cxl_decoder_switch_attribute_groups,
-};
-
-static const struct device_type cxl_decoder_root_type = {
-       .name = "cxl_decoder_root",
-       .release = cxl_decoder_release,
-       .groups = cxl_decoder_root_attribute_groups,
-};
-
-bool is_root_decoder(struct device *dev)
-{
-       return dev->type == &cxl_decoder_root_type;
-}
-EXPORT_SYMBOL_GPL(is_root_decoder);
-
-struct cxl_decoder *to_cxl_decoder(struct device *dev)
-{
-       if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release,
-                         "not a cxl_decoder device\n"))
-               return NULL;
-       return container_of(dev, struct cxl_decoder, dev);
-}
-EXPORT_SYMBOL_GPL(to_cxl_decoder);
-
-static void cxl_dport_release(struct cxl_dport *dport)
-{
-       list_del(&dport->list);
-       put_device(dport->dport);
-       kfree(dport);
-}
-
-static void cxl_port_release(struct device *dev)
-{
-       struct cxl_port *port = to_cxl_port(dev);
-       struct cxl_dport *dport, *_d;
-
-       device_lock(dev);
-       list_for_each_entry_safe(dport, _d, &port->dports, list)
-               cxl_dport_release(dport);
-       device_unlock(dev);
-       ida_free(&cxl_port_ida, port->id);
-       kfree(port);
-}
-
-static const struct attribute_group *cxl_port_attribute_groups[] = {
-       &cxl_base_attribute_group,
-       NULL,
-};
-
-static const struct device_type cxl_port_type = {
-       .name = "cxl_port",
-       .release = cxl_port_release,
-       .groups = cxl_port_attribute_groups,
-};
-
-struct cxl_port *to_cxl_port(struct device *dev)
-{
-       if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
-                         "not a cxl_port device\n"))
-               return NULL;
-       return container_of(dev, struct cxl_port, dev);
-}
-
-static void unregister_port(void *_port)
-{
-       struct cxl_port *port = _port;
-       struct cxl_dport *dport;
-
-       device_lock(&port->dev);
-       list_for_each_entry(dport, &port->dports, list) {
-               char link_name[CXL_TARGET_STRLEN];
-
-               if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d",
-                            dport->port_id) >= CXL_TARGET_STRLEN)
-                       continue;
-               sysfs_remove_link(&port->dev.kobj, link_name);
-       }
-       device_unlock(&port->dev);
-       device_unregister(&port->dev);
-}
-
-static void cxl_unlink_uport(void *_port)
-{
-       struct cxl_port *port = _port;
-
-       sysfs_remove_link(&port->dev.kobj, "uport");
-}
-
-static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
-{
-       int rc;
-
-       rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
-       if (rc)
-               return rc;
-       return devm_add_action_or_reset(host, cxl_unlink_uport, port);
-}
-
-static struct cxl_port *cxl_port_alloc(struct device *uport,
-                                      resource_size_t component_reg_phys,
-                                      struct cxl_port *parent_port)
-{
-       struct cxl_port *port;
-       struct device *dev;
-       int rc;
-
-       port = kzalloc(sizeof(*port), GFP_KERNEL);
-       if (!port)
-               return ERR_PTR(-ENOMEM);
-
-       rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
-       if (rc < 0)
-               goto err;
-       port->id = rc;
-
-       /*
-        * The top-level cxl_port "cxl_root" does not have a cxl_port as
-        * its parent and it does not have any corresponding component
-        * registers as its decode is described by a fixed platform
-        * description.
-        */
-       dev = &port->dev;
-       if (parent_port)
-               dev->parent = &parent_port->dev;
-       else
-               dev->parent = uport;
-
-       port->uport = uport;
-       port->component_reg_phys = component_reg_phys;
-       ida_init(&port->decoder_ida);
-       INIT_LIST_HEAD(&port->dports);
-
-       device_initialize(dev);
-       device_set_pm_not_required(dev);
-       dev->bus = &cxl_bus_type;
-       dev->type = &cxl_port_type;
-
-       return port;
-
-err:
-       kfree(port);
-       return ERR_PTR(rc);
-}
-
-/**
- * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
- * @host: host device for devm operations
- * @uport: "physical" device implementing this upstream port
- * @component_reg_phys: (optional) for configurable cxl_port instances
- * @parent_port: next hop up in the CXL memory decode hierarchy
- */
-struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
-                                  resource_size_t component_reg_phys,
-                                  struct cxl_port *parent_port)
-{
-       struct cxl_port *port;
-       struct device *dev;
-       int rc;
-
-       port = cxl_port_alloc(uport, component_reg_phys, parent_port);
-       if (IS_ERR(port))
-               return port;
-
-       dev = &port->dev;
-       if (parent_port)
-               rc = dev_set_name(dev, "port%d", port->id);
-       else
-               rc = dev_set_name(dev, "root%d", port->id);
-       if (rc)
-               goto err;
-
-       rc = device_add(dev);
-       if (rc)
-               goto err;
-
-       rc = devm_add_action_or_reset(host, unregister_port, port);
-       if (rc)
-               return ERR_PTR(rc);
-
-       rc = devm_cxl_link_uport(host, port);
-       if (rc)
-               return ERR_PTR(rc);
-
-       return port;
-
-err:
-       put_device(dev);
-       return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_GPL(devm_cxl_add_port);
-
-static struct cxl_dport *find_dport(struct cxl_port *port, int id)
-{
-       struct cxl_dport *dport;
-
-       device_lock_assert(&port->dev);
-       list_for_each_entry (dport, &port->dports, list)
-               if (dport->port_id == id)
-                       return dport;
-       return NULL;
-}
-
-static int add_dport(struct cxl_port *port, struct cxl_dport *new)
-{
-       struct cxl_dport *dup;
-
-       device_lock(&port->dev);
-       dup = find_dport(port, new->port_id);
-       if (dup)
-               dev_err(&port->dev,
-                       "unable to add dport%d-%s non-unique port id (%s)\n",
-                       new->port_id, dev_name(new->dport),
-                       dev_name(dup->dport));
-       else
-               list_add_tail(&new->list, &port->dports);
-       device_unlock(&port->dev);
-
-       return dup ? -EEXIST : 0;
-}
-
-/**
- * cxl_add_dport - append downstream port data to a cxl_port
- * @port: the cxl_port that references this dport
- * @dport_dev: firmware or PCI device representing the dport
- * @port_id: identifier for this dport in a decoder's target list
- * @component_reg_phys: optional location of CXL component registers
- *
- * Note that all allocations and links are undone by cxl_port deletion
- * and release.
- */
-int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id,
-                 resource_size_t component_reg_phys)
-{
-       char link_name[CXL_TARGET_STRLEN];
-       struct cxl_dport *dport;
-       int rc;
-
-       if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
-           CXL_TARGET_STRLEN)
-               return -EINVAL;
-
-       dport = kzalloc(sizeof(*dport), GFP_KERNEL);
-       if (!dport)
-               return -ENOMEM;
-
-       INIT_LIST_HEAD(&dport->list);
-       dport->dport = get_device(dport_dev);
-       dport->port_id = port_id;
-       dport->component_reg_phys = component_reg_phys;
-       dport->port = port;
-
-       rc = add_dport(port, dport);
-       if (rc)
-               goto err;
-
-       rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
-       if (rc)
-               goto err;
-
-       return 0;
-err:
-       cxl_dport_release(dport);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(cxl_add_dport);
-
-static struct cxl_decoder *
-cxl_decoder_alloc(struct cxl_port *port, int nr_targets, resource_size_t base,
-                 resource_size_t len, int interleave_ways,
-                 int interleave_granularity, enum cxl_decoder_type type,
-                 unsigned long flags)
-{
-       struct cxl_decoder *cxld;
-       struct device *dev;
-       int rc = 0;
-
-       if (interleave_ways < 1)
-               return ERR_PTR(-EINVAL);
-
-       device_lock(&port->dev);
-       if (list_empty(&port->dports))
-               rc = -EINVAL;
-       device_unlock(&port->dev);
-       if (rc)
-               return ERR_PTR(rc);
-
-       cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL);
-       if (!cxld)
-               return ERR_PTR(-ENOMEM);
-
-       rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
-       if (rc < 0)
-               goto err;
-
-       *cxld = (struct cxl_decoder) {
-               .id = rc,
-               .range = {
-                       .start = base,
-                       .end = base + len - 1,
-               },
-               .flags = flags,
-               .interleave_ways = interleave_ways,
-               .interleave_granularity = interleave_granularity,
-               .target_type = type,
-       };
-
-       /* handle implied target_list */
-       if (interleave_ways == 1)
-               cxld->target[0] =
-                       list_first_entry(&port->dports, struct cxl_dport, list);
-       dev = &cxld->dev;
-       device_initialize(dev);
-       device_set_pm_not_required(dev);
-       dev->parent = &port->dev;
-       dev->bus = &cxl_bus_type;
-
-       /* root ports do not have a cxl_port_type parent */
-       if (port->dev.parent->type == &cxl_port_type)
-               dev->type = &cxl_decoder_switch_type;
-       else
-               dev->type = &cxl_decoder_root_type;
-
-       return cxld;
-err:
-       kfree(cxld);
-       return ERR_PTR(rc);
-}
-
-static void unregister_dev(void *dev)
-{
-       device_unregister(dev);
-}
-
-struct cxl_decoder *
-devm_cxl_add_decoder(struct device *host, struct cxl_port *port, int nr_targets,
-                    resource_size_t base, resource_size_t len,
-                    int interleave_ways, int interleave_granularity,
-                    enum cxl_decoder_type type, unsigned long flags)
-{
-       struct cxl_decoder *cxld;
-       struct device *dev;
-       int rc;
-
-       cxld = cxl_decoder_alloc(port, nr_targets, base, len, interleave_ways,
-                                interleave_granularity, type, flags);
-       if (IS_ERR(cxld))
-               return cxld;
-
-       dev = &cxld->dev;
-       rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
-       if (rc)
-               goto err;
-
-       rc = device_add(dev);
-       if (rc)
-               goto err;
-
-       rc = devm_add_action_or_reset(host, unregister_dev, dev);
-       if (rc)
-               return ERR_PTR(rc);
-       return cxld;
-
-err:
-       put_device(dev);
-       return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_GPL(devm_cxl_add_decoder);
-
-/**
- * cxl_probe_component_regs() - Detect CXL Component register blocks
- * @dev: Host device of the @base mapping
- * @base: Mapping containing the HDM Decoder Capability Header
- * @map: Map object describing the register block information found
- *
- * See CXL 2.0 8.2.4 Component Register Layout and Definition
- * See CXL 2.0 8.2.5.5 CXL Device Register Interface
- *
- * Probe for component register information and return it in map object.
- */
-void cxl_probe_component_regs(struct device *dev, void __iomem *base,
-                             struct cxl_component_reg_map *map)
-{
-       int cap, cap_count;
-       u64 cap_array;
-
-       *map = (struct cxl_component_reg_map) { 0 };
-
-       /*
-        * CXL.cache and CXL.mem registers are at offset 0x1000 as defined in
-        * CXL 2.0 8.2.4 Table 141.
-        */
-       base += CXL_CM_OFFSET;
-
-       cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET);
-
-       if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) {
-               dev_err(dev,
-                       "Couldn't locate the CXL.cache and CXL.mem capability array header./n");
-               return;
-       }
-
-       /* It's assumed that future versions will be backward compatible */
-       cap_count = FIELD_GET(CXL_CM_CAP_HDR_ARRAY_SIZE_MASK, cap_array);
-
-       for (cap = 1; cap <= cap_count; cap++) {
-               void __iomem *register_block;
-               u32 hdr;
-               int decoder_cnt;
-               u16 cap_id, offset;
-               u32 length;
-
-               hdr = readl(base + cap * 0x4);
-
-               cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr);
-               offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr);
-               register_block = base + offset;
-
-               switch (cap_id) {
-               case CXL_CM_CAP_CAP_ID_HDM:
-                       dev_dbg(dev, "found HDM decoder capability (0x%x)\n",
-                               offset);
-
-                       hdr = readl(register_block);
-
-                       decoder_cnt = cxl_hdm_decoder_count(hdr);
-                       length = 0x20 * decoder_cnt + 0x10;
-
-                       map->hdm_decoder.valid = true;
-                       map->hdm_decoder.offset = CXL_CM_OFFSET + offset;
-                       map->hdm_decoder.size = length;
-                       break;
-               default:
-                       dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id,
-                               offset);
-                       break;
-               }
-       }
-}
-EXPORT_SYMBOL_GPL(cxl_probe_component_regs);
-
-static void cxl_nvdimm_bridge_release(struct device *dev)
-{
-       struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
-
-       kfree(cxl_nvb);
-}
-
-static const struct attribute_group *cxl_nvdimm_bridge_attribute_groups[] = {
-       &cxl_base_attribute_group,
-       NULL,
-};
-
-static const struct device_type cxl_nvdimm_bridge_type = {
-       .name = "cxl_nvdimm_bridge",
-       .release = cxl_nvdimm_bridge_release,
-       .groups = cxl_nvdimm_bridge_attribute_groups,
-};
-
-struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev)
-{
-       if (dev_WARN_ONCE(dev, dev->type != &cxl_nvdimm_bridge_type,
-                         "not a cxl_nvdimm_bridge device\n"))
-               return NULL;
-       return container_of(dev, struct cxl_nvdimm_bridge, dev);
-}
-EXPORT_SYMBOL_GPL(to_cxl_nvdimm_bridge);
-
-static struct cxl_nvdimm_bridge *
-cxl_nvdimm_bridge_alloc(struct cxl_port *port)
-{
-       struct cxl_nvdimm_bridge *cxl_nvb;
-       struct device *dev;
-
-       cxl_nvb = kzalloc(sizeof(*cxl_nvb), GFP_KERNEL);
-       if (!cxl_nvb)
-               return ERR_PTR(-ENOMEM);
-
-       dev = &cxl_nvb->dev;
-       cxl_nvb->port = port;
-       cxl_nvb->state = CXL_NVB_NEW;
-       device_initialize(dev);
-       device_set_pm_not_required(dev);
-       dev->parent = &port->dev;
-       dev->bus = &cxl_bus_type;
-       dev->type = &cxl_nvdimm_bridge_type;
-
-       return cxl_nvb;
-}
-
-static void unregister_nvb(void *_cxl_nvb)
-{
-       struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
-       bool flush;
-
-       /*
-        * If the bridge was ever activated then there might be in-flight state
-        * work to flush. Once the state has been changed to 'dead' then no new
-        * work can be queued by user-triggered bind.
-        */
-       device_lock(&cxl_nvb->dev);
-       flush = cxl_nvb->state != CXL_NVB_NEW;
-       cxl_nvb->state = CXL_NVB_DEAD;
-       device_unlock(&cxl_nvb->dev);
-
-       /*
-        * Even though the device core will trigger device_release_driver()
-        * before the unregister, it does not know about the fact that
-        * cxl_nvdimm_bridge_driver defers ->remove() work. So, do the driver
-        * release not and flush it before tearing down the nvdimm device
-        * hierarchy.
-        */
-       device_release_driver(&cxl_nvb->dev);
-       if (flush)
-               flush_work(&cxl_nvb->state_work);
-       device_unregister(&cxl_nvb->dev);
-}
-
-struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
-                                                    struct cxl_port *port)
-{
-       struct cxl_nvdimm_bridge *cxl_nvb;
-       struct device *dev;
-       int rc;
-
-       if (!IS_ENABLED(CONFIG_CXL_PMEM))
-               return ERR_PTR(-ENXIO);
-
-       cxl_nvb = cxl_nvdimm_bridge_alloc(port);
-       if (IS_ERR(cxl_nvb))
-               return cxl_nvb;
-
-       dev = &cxl_nvb->dev;
-       rc = dev_set_name(dev, "nvdimm-bridge");
-       if (rc)
-               goto err;
-
-       rc = device_add(dev);
-       if (rc)
-               goto err;
-
-       rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb);
-       if (rc)
-               return ERR_PTR(rc);
-
-       return cxl_nvb;
-
-err:
-       put_device(dev);
-       return ERR_PTR(rc);
-}
-EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm_bridge);
-
-static void cxl_nvdimm_release(struct device *dev)
-{
-       struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
-
-       kfree(cxl_nvd);
-}
-
-static const struct attribute_group *cxl_nvdimm_attribute_groups[] = {
-       &cxl_base_attribute_group,
-       NULL,
-};
-
-static const struct device_type cxl_nvdimm_type = {
-       .name = "cxl_nvdimm",
-       .release = cxl_nvdimm_release,
-       .groups = cxl_nvdimm_attribute_groups,
-};
-
-bool is_cxl_nvdimm(struct device *dev)
-{
-       return dev->type == &cxl_nvdimm_type;
-}
-EXPORT_SYMBOL_GPL(is_cxl_nvdimm);
-
-struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
-{
-       if (dev_WARN_ONCE(dev, !is_cxl_nvdimm(dev),
-                         "not a cxl_nvdimm device\n"))
-               return NULL;
-       return container_of(dev, struct cxl_nvdimm, dev);
-}
-EXPORT_SYMBOL_GPL(to_cxl_nvdimm);
-
-static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
-{
-       struct cxl_nvdimm *cxl_nvd;
-       struct device *dev;
-
-       cxl_nvd = kzalloc(sizeof(*cxl_nvd), GFP_KERNEL);
-       if (!cxl_nvd)
-               return ERR_PTR(-ENOMEM);
-
-       dev = &cxl_nvd->dev;
-       cxl_nvd->cxlmd = cxlmd;
-       device_initialize(dev);
-       device_set_pm_not_required(dev);
-       dev->parent = &cxlmd->dev;
-       dev->bus = &cxl_bus_type;
-       dev->type = &cxl_nvdimm_type;
-
-       return cxl_nvd;
-}
-
-int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
-{
-       struct cxl_nvdimm *cxl_nvd;
-       struct device *dev;
-       int rc;
-
-       cxl_nvd = cxl_nvdimm_alloc(cxlmd);
-       if (IS_ERR(cxl_nvd))
-               return PTR_ERR(cxl_nvd);
-
-       dev = &cxl_nvd->dev;
-       rc = dev_set_name(dev, "pmem%d", cxlmd->id);
-       if (rc)
-               goto err;
-
-       rc = device_add(dev);
-       if (rc)
-               goto err;
-
-       dev_dbg(host, "%s: register %s\n", dev_name(dev->parent),
-               dev_name(dev));
-
-       return devm_add_action_or_reset(host, unregister_dev, dev);
-
-err:
-       put_device(dev);
-       return rc;
-}
-EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm);
-
-/**
- * cxl_probe_device_regs() - Detect CXL Device register blocks
- * @dev: Host device of the @base mapping
- * @base: Mapping of CXL 2.0 8.2.8 CXL Device Register Interface
- * @map: Map object describing the register block information found
- *
- * Probe for device register information and return it in map object.
- */
-void cxl_probe_device_regs(struct device *dev, void __iomem *base,
-                          struct cxl_device_reg_map *map)
-{
-       int cap, cap_count;
-       u64 cap_array;
-
-       *map = (struct cxl_device_reg_map){ 0 };
-
-       cap_array = readq(base + CXLDEV_CAP_ARRAY_OFFSET);
-       if (FIELD_GET(CXLDEV_CAP_ARRAY_ID_MASK, cap_array) !=
-           CXLDEV_CAP_ARRAY_CAP_ID)
-               return;
-
-       cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array);
-
-       for (cap = 1; cap <= cap_count; cap++) {
-               u32 offset, length;
-               u16 cap_id;
-
-               cap_id = FIELD_GET(CXLDEV_CAP_HDR_CAP_ID_MASK,
-                                  readl(base + cap * 0x10));
-               offset = readl(base + cap * 0x10 + 0x4);
-               length = readl(base + cap * 0x10 + 0x8);
-
-               switch (cap_id) {
-               case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
-                       dev_dbg(dev, "found Status capability (0x%x)\n", offset);
-
-                       map->status.valid = true;
-                       map->status.offset = offset;
-                       map->status.size = length;
-                       break;
-               case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
-                       dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset);
-                       map->mbox.valid = true;
-                       map->mbox.offset = offset;
-                       map->mbox.size = length;
-                       break;
-               case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
-                       dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset);
-                       break;
-               case CXLDEV_CAP_CAP_ID_MEMDEV:
-                       dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset);
-                       map->memdev.valid = true;
-                       map->memdev.offset = offset;
-                       map->memdev.size = length;
-                       break;
-               default:
-                       if (cap_id >= 0x8000)
-                               dev_dbg(dev, "Vendor cap ID: %#x offset: %#x\n", cap_id, offset);
-                       else
-                               dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset);
-                       break;
-               }
-       }
-}
-EXPORT_SYMBOL_GPL(cxl_probe_device_regs);
-
-static void __iomem *devm_cxl_iomap_block(struct device *dev,
-                                         resource_size_t addr,
-                                         resource_size_t length)
-{
-       void __iomem *ret_val;
-       struct resource *res;
-
-       res = devm_request_mem_region(dev, addr, length, dev_name(dev));
-       if (!res) {
-               resource_size_t end = addr + length - 1;
-
-               dev_err(dev, "Failed to request region %pa-%pa\n", &addr, &end);
-               return NULL;
-       }
-
-       ret_val = devm_ioremap(dev, addr, length);
-       if (!ret_val)
-               dev_err(dev, "Failed to map region %pr\n", res);
-
-       return ret_val;
-}
-
-int cxl_map_component_regs(struct pci_dev *pdev,
-                          struct cxl_component_regs *regs,
-                          struct cxl_register_map *map)
-{
-       struct device *dev = &pdev->dev;
-       resource_size_t phys_addr;
-       resource_size_t length;
-
-       phys_addr = pci_resource_start(pdev, map->barno);
-       phys_addr += map->block_offset;
-
-       phys_addr += map->component_map.hdm_decoder.offset;
-       length = map->component_map.hdm_decoder.size;
-       regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length);
-       if (!regs->hdm_decoder)
-               return -ENOMEM;
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(cxl_map_component_regs);
-
-int cxl_map_device_regs(struct pci_dev *pdev,
-                       struct cxl_device_regs *regs,
-                       struct cxl_register_map *map)
-{
-       struct device *dev = &pdev->dev;
-       resource_size_t phys_addr;
-
-       phys_addr = pci_resource_start(pdev, map->barno);
-       phys_addr += map->block_offset;
-
-       if (map->device_map.status.valid) {
-               resource_size_t addr;
-               resource_size_t length;
-
-               addr = phys_addr + map->device_map.status.offset;
-               length = map->device_map.status.size;
-               regs->status = devm_cxl_iomap_block(dev, addr, length);
-               if (!regs->status)
-                       return -ENOMEM;
-       }
-
-       if (map->device_map.mbox.valid) {
-               resource_size_t addr;
-               resource_size_t length;
-
-               addr = phys_addr + map->device_map.mbox.offset;
-               length = map->device_map.mbox.size;
-               regs->mbox = devm_cxl_iomap_block(dev, addr, length);
-               if (!regs->mbox)
-                       return -ENOMEM;
-       }
-
-       if (map->device_map.memdev.valid) {
-               resource_size_t addr;
-               resource_size_t length;
-
-               addr = phys_addr + map->device_map.memdev.offset;
-               length = map->device_map.memdev.size;
-               regs->memdev = devm_cxl_iomap_block(dev, addr, length);
-               if (!regs->memdev)
-                       return -ENOMEM;
-       }
-
-       return 0;
-}
-EXPORT_SYMBOL_GPL(cxl_map_device_regs);
-
-/**
- * __cxl_driver_register - register a driver for the cxl bus
- * @cxl_drv: cxl driver structure to attach
- * @owner: owning module/driver
- * @modname: KBUILD_MODNAME for parent driver
- */
-int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
-                         const char *modname)
-{
-       if (!cxl_drv->probe) {
-               pr_debug("%s ->probe() must be specified\n", modname);
-               return -EINVAL;
-       }
-
-       if (!cxl_drv->name) {
-               pr_debug("%s ->name must be specified\n", modname);
-               return -EINVAL;
-       }
-
-       if (!cxl_drv->id) {
-               pr_debug("%s ->id must be specified\n", modname);
-               return -EINVAL;
-       }
-
-       cxl_drv->drv.bus = &cxl_bus_type;
-       cxl_drv->drv.owner = owner;
-       cxl_drv->drv.mod_name = modname;
-       cxl_drv->drv.name = cxl_drv->name;
-
-       return driver_register(&cxl_drv->drv);
-}
-EXPORT_SYMBOL_GPL(__cxl_driver_register);
-
-void cxl_driver_unregister(struct cxl_driver *cxl_drv)
-{
-       driver_unregister(&cxl_drv->drv);
-}
-EXPORT_SYMBOL_GPL(cxl_driver_unregister);
-
-static int cxl_device_id(struct device *dev)
-{
-       if (dev->type == &cxl_nvdimm_bridge_type)
-               return CXL_DEVICE_NVDIMM_BRIDGE;
-       if (dev->type == &cxl_nvdimm_type)
-               return CXL_DEVICE_NVDIMM;
-       return 0;
-}
-
-static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
-{
-       return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
-                             cxl_device_id(dev));
-}
-
-static int cxl_bus_match(struct device *dev, struct device_driver *drv)
-{
-       return cxl_device_id(dev) == to_cxl_drv(drv)->id;
-}
-
-static int cxl_bus_probe(struct device *dev)
-{
-       return to_cxl_drv(dev->driver)->probe(dev);
-}
-
-static void cxl_bus_remove(struct device *dev)
-{
-       struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
-
-       if (cxl_drv->remove)
-               cxl_drv->remove(dev);
-}
-
-struct bus_type cxl_bus_type = {
-       .name = "cxl",
-       .uevent = cxl_bus_uevent,
-       .match = cxl_bus_match,
-       .probe = cxl_bus_probe,
-       .remove = cxl_bus_remove,
-};
-EXPORT_SYMBOL_GPL(cxl_bus_type);
-
-static __init int cxl_core_init(void)
-{
-       return bus_register(&cxl_bus_type);
-}
-
-static void cxl_core_exit(void)
-{
-       bus_unregister(&cxl_bus_type);
-}
-
-module_init(cxl_core_init);
-module_exit(cxl_core_exit);
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/core/Makefile b/drivers/cxl/core/Makefile
new file mode 100644 (file)
index 0000000..0fdbf3c
--- /dev/null
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CXL_BUS) += cxl_core.o
+
+ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CXL -I$(srctree)/drivers/cxl
+cxl_core-y := bus.o
+cxl_core-y += pmem.o
+cxl_core-y += regs.o
+cxl_core-y += memdev.o
diff --git a/drivers/cxl/core/bus.c b/drivers/cxl/core/bus.c
new file mode 100644 (file)
index 0000000..267d804
--- /dev/null
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl core
+ *
+ * The CXL core provides a set of interfaces that can be consumed by CXL aware
+ * drivers. The interfaces allow for creation, modification, and destruction of
+ * regions, memory devices, ports, and decoders. CXL aware drivers must register
+ * with the CXL core via these interfaces in order to be able to participate in
+ * cross-device interleave coordination. The CXL core also establishes and
+ * maintains the bridge to the nvdimm subsystem.
+ *
+ * CXL core introduces sysfs hierarchy to control the devices that are
+ * instantiated by the core.
+ */
+
+static DEFINE_IDA(cxl_port_ida);
+
+static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
+                           char *buf)
+{
+       return sysfs_emit(buf, "%s\n", dev->type->name);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static struct attribute *cxl_base_attributes[] = {
+       &dev_attr_devtype.attr,
+       NULL,
+};
+
+struct attribute_group cxl_base_attribute_group = {
+       .attrs = cxl_base_attributes,
+};
+
+static ssize_t start_show(struct device *dev, struct device_attribute *attr,
+                         char *buf)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+       return sysfs_emit(buf, "%#llx\n", cxld->range.start);
+}
+static DEVICE_ATTR_RO(start);
+
+static ssize_t size_show(struct device *dev, struct device_attribute *attr,
+                       char *buf)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+       return sysfs_emit(buf, "%#llx\n", range_len(&cxld->range));
+}
+static DEVICE_ATTR_RO(size);
+
+#define CXL_DECODER_FLAG_ATTR(name, flag)                            \
+static ssize_t name##_show(struct device *dev,                       \
+                          struct device_attribute *attr, char *buf) \
+{                                                                    \
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);              \
+                                                                     \
+       return sysfs_emit(buf, "%s\n",                               \
+                         (cxld->flags & (flag)) ? "1" : "0");       \
+}                                                                    \
+static DEVICE_ATTR_RO(name)
+
+CXL_DECODER_FLAG_ATTR(cap_pmem, CXL_DECODER_F_PMEM);
+CXL_DECODER_FLAG_ATTR(cap_ram, CXL_DECODER_F_RAM);
+CXL_DECODER_FLAG_ATTR(cap_type2, CXL_DECODER_F_TYPE2);
+CXL_DECODER_FLAG_ATTR(cap_type3, CXL_DECODER_F_TYPE3);
+CXL_DECODER_FLAG_ATTR(locked, CXL_DECODER_F_LOCK);
+
+static ssize_t target_type_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+
+       switch (cxld->target_type) {
+       case CXL_DECODER_ACCELERATOR:
+               return sysfs_emit(buf, "accelerator\n");
+       case CXL_DECODER_EXPANDER:
+               return sysfs_emit(buf, "expander\n");
+       }
+       return -ENXIO;
+}
+static DEVICE_ATTR_RO(target_type);
+
+static ssize_t target_list_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+       ssize_t offset = 0;
+       int i, rc = 0;
+
+       device_lock(dev);
+       for (i = 0; i < cxld->interleave_ways; i++) {
+               struct cxl_dport *dport = cxld->target[i];
+               struct cxl_dport *next = NULL;
+
+               if (!dport)
+                       break;
+
+               if (i + 1 < cxld->interleave_ways)
+                       next = cxld->target[i + 1];
+               rc = sysfs_emit_at(buf, offset, "%d%s", dport->port_id,
+                                  next ? "," : "");
+               if (rc < 0)
+                       break;
+               offset += rc;
+       }
+       device_unlock(dev);
+
+       if (rc < 0)
+               return rc;
+
+       rc = sysfs_emit_at(buf, offset, "\n");
+       if (rc < 0)
+               return rc;
+
+       return offset + rc;
+}
+static DEVICE_ATTR_RO(target_list);
+
+static struct attribute *cxl_decoder_base_attrs[] = {
+       &dev_attr_start.attr,
+       &dev_attr_size.attr,
+       &dev_attr_locked.attr,
+       &dev_attr_target_list.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_decoder_base_attribute_group = {
+       .attrs = cxl_decoder_base_attrs,
+};
+
+static struct attribute *cxl_decoder_root_attrs[] = {
+       &dev_attr_cap_pmem.attr,
+       &dev_attr_cap_ram.attr,
+       &dev_attr_cap_type2.attr,
+       &dev_attr_cap_type3.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_decoder_root_attribute_group = {
+       .attrs = cxl_decoder_root_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_root_attribute_groups[] = {
+       &cxl_decoder_root_attribute_group,
+       &cxl_decoder_base_attribute_group,
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+static struct attribute *cxl_decoder_switch_attrs[] = {
+       &dev_attr_target_type.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_decoder_switch_attribute_group = {
+       .attrs = cxl_decoder_switch_attrs,
+};
+
+static const struct attribute_group *cxl_decoder_switch_attribute_groups[] = {
+       &cxl_decoder_switch_attribute_group,
+       &cxl_decoder_base_attribute_group,
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+static void cxl_decoder_release(struct device *dev)
+{
+       struct cxl_decoder *cxld = to_cxl_decoder(dev);
+       struct cxl_port *port = to_cxl_port(dev->parent);
+
+       ida_free(&port->decoder_ida, cxld->id);
+       kfree(cxld);
+}
+
+static const struct device_type cxl_decoder_switch_type = {
+       .name = "cxl_decoder_switch",
+       .release = cxl_decoder_release,
+       .groups = cxl_decoder_switch_attribute_groups,
+};
+
+static const struct device_type cxl_decoder_root_type = {
+       .name = "cxl_decoder_root",
+       .release = cxl_decoder_release,
+       .groups = cxl_decoder_root_attribute_groups,
+};
+
+bool is_root_decoder(struct device *dev)
+{
+       return dev->type == &cxl_decoder_root_type;
+}
+EXPORT_SYMBOL_GPL(is_root_decoder);
+
+struct cxl_decoder *to_cxl_decoder(struct device *dev)
+{
+       if (dev_WARN_ONCE(dev, dev->type->release != cxl_decoder_release,
+                         "not a cxl_decoder device\n"))
+               return NULL;
+       return container_of(dev, struct cxl_decoder, dev);
+}
+EXPORT_SYMBOL_GPL(to_cxl_decoder);
+
+static void cxl_dport_release(struct cxl_dport *dport)
+{
+       list_del(&dport->list);
+       put_device(dport->dport);
+       kfree(dport);
+}
+
+static void cxl_port_release(struct device *dev)
+{
+       struct cxl_port *port = to_cxl_port(dev);
+       struct cxl_dport *dport, *_d;
+
+       device_lock(dev);
+       list_for_each_entry_safe(dport, _d, &port->dports, list)
+               cxl_dport_release(dport);
+       device_unlock(dev);
+       ida_free(&cxl_port_ida, port->id);
+       kfree(port);
+}
+
+static const struct attribute_group *cxl_port_attribute_groups[] = {
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+static const struct device_type cxl_port_type = {
+       .name = "cxl_port",
+       .release = cxl_port_release,
+       .groups = cxl_port_attribute_groups,
+};
+
+struct cxl_port *to_cxl_port(struct device *dev)
+{
+       if (dev_WARN_ONCE(dev, dev->type != &cxl_port_type,
+                         "not a cxl_port device\n"))
+               return NULL;
+       return container_of(dev, struct cxl_port, dev);
+}
+
+static void unregister_port(void *_port)
+{
+       struct cxl_port *port = _port;
+       struct cxl_dport *dport;
+
+       device_lock(&port->dev);
+       list_for_each_entry(dport, &port->dports, list) {
+               char link_name[CXL_TARGET_STRLEN];
+
+               if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d",
+                            dport->port_id) >= CXL_TARGET_STRLEN)
+                       continue;
+               sysfs_remove_link(&port->dev.kobj, link_name);
+       }
+       device_unlock(&port->dev);
+       device_unregister(&port->dev);
+}
+
+static void cxl_unlink_uport(void *_port)
+{
+       struct cxl_port *port = _port;
+
+       sysfs_remove_link(&port->dev.kobj, "uport");
+}
+
+static int devm_cxl_link_uport(struct device *host, struct cxl_port *port)
+{
+       int rc;
+
+       rc = sysfs_create_link(&port->dev.kobj, &port->uport->kobj, "uport");
+       if (rc)
+               return rc;
+       return devm_add_action_or_reset(host, cxl_unlink_uport, port);
+}
+
+static struct cxl_port *cxl_port_alloc(struct device *uport,
+                                      resource_size_t component_reg_phys,
+                                      struct cxl_port *parent_port)
+{
+       struct cxl_port *port;
+       struct device *dev;
+       int rc;
+
+       port = kzalloc(sizeof(*port), GFP_KERNEL);
+       if (!port)
+               return ERR_PTR(-ENOMEM);
+
+       rc = ida_alloc(&cxl_port_ida, GFP_KERNEL);
+       if (rc < 0)
+               goto err;
+       port->id = rc;
+
+       /*
+        * The top-level cxl_port "cxl_root" does not have a cxl_port as
+        * its parent and it does not have any corresponding component
+        * registers as its decode is described by a fixed platform
+        * description.
+        */
+       dev = &port->dev;
+       if (parent_port)
+               dev->parent = &parent_port->dev;
+       else
+               dev->parent = uport;
+
+       port->uport = uport;
+       port->component_reg_phys = component_reg_phys;
+       ida_init(&port->decoder_ida);
+       INIT_LIST_HEAD(&port->dports);
+
+       device_initialize(dev);
+       device_set_pm_not_required(dev);
+       dev->bus = &cxl_bus_type;
+       dev->type = &cxl_port_type;
+
+       return port;
+
+err:
+       kfree(port);
+       return ERR_PTR(rc);
+}
+
+/**
+ * devm_cxl_add_port - register a cxl_port in CXL memory decode hierarchy
+ * @host: host device for devm operations
+ * @uport: "physical" device implementing this upstream port
+ * @component_reg_phys: (optional) for configurable cxl_port instances
+ * @parent_port: next hop up in the CXL memory decode hierarchy
+ */
+struct cxl_port *devm_cxl_add_port(struct device *host, struct device *uport,
+                                  resource_size_t component_reg_phys,
+                                  struct cxl_port *parent_port)
+{
+       struct cxl_port *port;
+       struct device *dev;
+       int rc;
+
+       port = cxl_port_alloc(uport, component_reg_phys, parent_port);
+       if (IS_ERR(port))
+               return port;
+
+       dev = &port->dev;
+       if (parent_port)
+               rc = dev_set_name(dev, "port%d", port->id);
+       else
+               rc = dev_set_name(dev, "root%d", port->id);
+       if (rc)
+               goto err;
+
+       rc = device_add(dev);
+       if (rc)
+               goto err;
+
+       rc = devm_add_action_or_reset(host, unregister_port, port);
+       if (rc)
+               return ERR_PTR(rc);
+
+       rc = devm_cxl_link_uport(host, port);
+       if (rc)
+               return ERR_PTR(rc);
+
+       return port;
+
+err:
+       put_device(dev);
+       return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(devm_cxl_add_port);
+
+static struct cxl_dport *find_dport(struct cxl_port *port, int id)
+{
+       struct cxl_dport *dport;
+
+       device_lock_assert(&port->dev);
+       list_for_each_entry (dport, &port->dports, list)
+               if (dport->port_id == id)
+                       return dport;
+       return NULL;
+}
+
+static int add_dport(struct cxl_port *port, struct cxl_dport *new)
+{
+       struct cxl_dport *dup;
+
+       device_lock(&port->dev);
+       dup = find_dport(port, new->port_id);
+       if (dup)
+               dev_err(&port->dev,
+                       "unable to add dport%d-%s non-unique port id (%s)\n",
+                       new->port_id, dev_name(new->dport),
+                       dev_name(dup->dport));
+       else
+               list_add_tail(&new->list, &port->dports);
+       device_unlock(&port->dev);
+
+       return dup ? -EEXIST : 0;
+}
+
+/**
+ * cxl_add_dport - append downstream port data to a cxl_port
+ * @port: the cxl_port that references this dport
+ * @dport_dev: firmware or PCI device representing the dport
+ * @port_id: identifier for this dport in a decoder's target list
+ * @component_reg_phys: optional location of CXL component registers
+ *
+ * Note that all allocations and links are undone by cxl_port deletion
+ * and release.
+ */
+int cxl_add_dport(struct cxl_port *port, struct device *dport_dev, int port_id,
+                 resource_size_t component_reg_phys)
+{
+       char link_name[CXL_TARGET_STRLEN];
+       struct cxl_dport *dport;
+       int rc;
+
+       if (snprintf(link_name, CXL_TARGET_STRLEN, "dport%d", port_id) >=
+           CXL_TARGET_STRLEN)
+               return -EINVAL;
+
+       dport = kzalloc(sizeof(*dport), GFP_KERNEL);
+       if (!dport)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&dport->list);
+       dport->dport = get_device(dport_dev);
+       dport->port_id = port_id;
+       dport->component_reg_phys = component_reg_phys;
+       dport->port = port;
+
+       rc = add_dport(port, dport);
+       if (rc)
+               goto err;
+
+       rc = sysfs_create_link(&port->dev.kobj, &dport_dev->kobj, link_name);
+       if (rc)
+               goto err;
+
+       return 0;
+err:
+       cxl_dport_release(dport);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(cxl_add_dport);
+
+static struct cxl_decoder *
+cxl_decoder_alloc(struct cxl_port *port, int nr_targets, resource_size_t base,
+                 resource_size_t len, int interleave_ways,
+                 int interleave_granularity, enum cxl_decoder_type type,
+                 unsigned long flags)
+{
+       struct cxl_decoder *cxld;
+       struct device *dev;
+       int rc = 0;
+
+       if (interleave_ways < 1)
+               return ERR_PTR(-EINVAL);
+
+       device_lock(&port->dev);
+       if (list_empty(&port->dports))
+               rc = -EINVAL;
+       device_unlock(&port->dev);
+       if (rc)
+               return ERR_PTR(rc);
+
+       cxld = kzalloc(struct_size(cxld, target, nr_targets), GFP_KERNEL);
+       if (!cxld)
+               return ERR_PTR(-ENOMEM);
+
+       rc = ida_alloc(&port->decoder_ida, GFP_KERNEL);
+       if (rc < 0)
+               goto err;
+
+       *cxld = (struct cxl_decoder) {
+               .id = rc,
+               .range = {
+                       .start = base,
+                       .end = base + len - 1,
+               },
+               .flags = flags,
+               .interleave_ways = interleave_ways,
+               .interleave_granularity = interleave_granularity,
+               .target_type = type,
+       };
+
+       /* handle implied target_list */
+       if (interleave_ways == 1)
+               cxld->target[0] =
+                       list_first_entry(&port->dports, struct cxl_dport, list);
+       dev = &cxld->dev;
+       device_initialize(dev);
+       device_set_pm_not_required(dev);
+       dev->parent = &port->dev;
+       dev->bus = &cxl_bus_type;
+
+       /* root ports do not have a cxl_port_type parent */
+       if (port->dev.parent->type == &cxl_port_type)
+               dev->type = &cxl_decoder_switch_type;
+       else
+               dev->type = &cxl_decoder_root_type;
+
+       return cxld;
+err:
+       kfree(cxld);
+       return ERR_PTR(rc);
+}
+
+struct cxl_decoder *
+devm_cxl_add_decoder(struct device *host, struct cxl_port *port, int nr_targets,
+                    resource_size_t base, resource_size_t len,
+                    int interleave_ways, int interleave_granularity,
+                    enum cxl_decoder_type type, unsigned long flags)
+{
+       struct cxl_decoder *cxld;
+       struct device *dev;
+       int rc;
+
+       cxld = cxl_decoder_alloc(port, nr_targets, base, len, interleave_ways,
+                                interleave_granularity, type, flags);
+       if (IS_ERR(cxld))
+               return cxld;
+
+       dev = &cxld->dev;
+       rc = dev_set_name(dev, "decoder%d.%d", port->id, cxld->id);
+       if (rc)
+               goto err;
+
+       rc = device_add(dev);
+       if (rc)
+               goto err;
+
+       rc = devm_add_action_or_reset(host, unregister_cxl_dev, dev);
+       if (rc)
+               return ERR_PTR(rc);
+       return cxld;
+
+err:
+       put_device(dev);
+       return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(devm_cxl_add_decoder);
+
+/**
+ * __cxl_driver_register - register a driver for the cxl bus
+ * @cxl_drv: cxl driver structure to attach
+ * @owner: owning module/driver
+ * @modname: KBUILD_MODNAME for parent driver
+ */
+int __cxl_driver_register(struct cxl_driver *cxl_drv, struct module *owner,
+                         const char *modname)
+{
+       if (!cxl_drv->probe) {
+               pr_debug("%s ->probe() must be specified\n", modname);
+               return -EINVAL;
+       }
+
+       if (!cxl_drv->name) {
+               pr_debug("%s ->name must be specified\n", modname);
+               return -EINVAL;
+       }
+
+       if (!cxl_drv->id) {
+               pr_debug("%s ->id must be specified\n", modname);
+               return -EINVAL;
+       }
+
+       cxl_drv->drv.bus = &cxl_bus_type;
+       cxl_drv->drv.owner = owner;
+       cxl_drv->drv.mod_name = modname;
+       cxl_drv->drv.name = cxl_drv->name;
+
+       return driver_register(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_GPL(__cxl_driver_register);
+
+void cxl_driver_unregister(struct cxl_driver *cxl_drv)
+{
+       driver_unregister(&cxl_drv->drv);
+}
+EXPORT_SYMBOL_GPL(cxl_driver_unregister);
+
+static int cxl_device_id(struct device *dev)
+{
+       if (dev->type == &cxl_nvdimm_bridge_type)
+               return CXL_DEVICE_NVDIMM_BRIDGE;
+       if (dev->type == &cxl_nvdimm_type)
+               return CXL_DEVICE_NVDIMM;
+       return 0;
+}
+
+static int cxl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+       return add_uevent_var(env, "MODALIAS=" CXL_MODALIAS_FMT,
+                             cxl_device_id(dev));
+}
+
+static int cxl_bus_match(struct device *dev, struct device_driver *drv)
+{
+       return cxl_device_id(dev) == to_cxl_drv(drv)->id;
+}
+
+static int cxl_bus_probe(struct device *dev)
+{
+       return to_cxl_drv(dev->driver)->probe(dev);
+}
+
+static void cxl_bus_remove(struct device *dev)
+{
+       struct cxl_driver *cxl_drv = to_cxl_drv(dev->driver);
+
+       if (cxl_drv->remove)
+               cxl_drv->remove(dev);
+}
+
+struct bus_type cxl_bus_type = {
+       .name = "cxl",
+       .uevent = cxl_bus_uevent,
+       .match = cxl_bus_match,
+       .probe = cxl_bus_probe,
+       .remove = cxl_bus_remove,
+};
+EXPORT_SYMBOL_GPL(cxl_bus_type);
+
+static __init int cxl_core_init(void)
+{
+       int rc;
+
+       rc = cxl_memdev_init();
+       if (rc)
+               return rc;
+
+       rc = bus_register(&cxl_bus_type);
+       if (rc)
+               goto err;
+       return 0;
+
+err:
+       cxl_memdev_exit();
+       return rc;
+}
+
+static void cxl_core_exit(void)
+{
+       bus_unregister(&cxl_bus_type);
+       cxl_memdev_exit();
+}
+
+module_init(cxl_core_init);
+module_exit(cxl_core_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h
new file mode 100644 (file)
index 0000000..036a3c8
--- /dev/null
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2020 Intel Corporation. */
+
+#ifndef __CXL_CORE_H__
+#define __CXL_CORE_H__
+
+extern const struct device_type cxl_nvdimm_bridge_type;
+extern const struct device_type cxl_nvdimm_type;
+
+extern struct attribute_group cxl_base_attribute_group;
+
+static inline void unregister_cxl_dev(void *dev)
+{
+       device_unregister(dev);
+}
+
+int cxl_memdev_init(void);
+void cxl_memdev_exit(void);
+
+#endif /* __CXL_CORE_H__ */
diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c
new file mode 100644 (file)
index 0000000..a9c317e
--- /dev/null
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. */
+
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <cxlmem.h>
+#include "core.h"
+
+/*
+ * An entire PCI topology full of devices should be enough for any
+ * config
+ */
+#define CXL_MEM_MAX_DEVS 65536
+
+static int cxl_mem_major;
+static DEFINE_IDA(cxl_memdev_ida);
+
+static void cxl_memdev_release(struct device *dev)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+
+       ida_free(&cxl_memdev_ida, cxlmd->id);
+       kfree(cxlmd);
+}
+
+static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
+                               kgid_t *gid)
+{
+       return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
+}
+
+static ssize_t firmware_version_show(struct device *dev,
+                                    struct device_attribute *attr, char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+
+       return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
+}
+static DEVICE_ATTR_RO(firmware_version);
+
+static ssize_t payload_max_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+
+       return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
+}
+static DEVICE_ATTR_RO(payload_max);
+
+static ssize_t label_storage_size_show(struct device *dev,
+                                      struct device_attribute *attr, char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+
+       return sysfs_emit(buf, "%zu\n", cxlm->lsa_size);
+}
+static DEVICE_ATTR_RO(label_storage_size);
+
+static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
+                            char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+       unsigned long long len = range_len(&cxlm->ram_range);
+
+       return sysfs_emit(buf, "%#llx\n", len);
+}
+
+static struct device_attribute dev_attr_ram_size =
+       __ATTR(size, 0444, ram_size_show, NULL);
+
+static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
+                             char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+       unsigned long long len = range_len(&cxlm->pmem_range);
+
+       return sysfs_emit(buf, "%#llx\n", len);
+}
+
+static struct device_attribute dev_attr_pmem_size =
+       __ATTR(size, 0444, pmem_size_show, NULL);
+
+static struct attribute *cxl_memdev_attributes[] = {
+       &dev_attr_firmware_version.attr,
+       &dev_attr_payload_max.attr,
+       &dev_attr_label_storage_size.attr,
+       NULL,
+};
+
+static struct attribute *cxl_memdev_pmem_attributes[] = {
+       &dev_attr_pmem_size.attr,
+       NULL,
+};
+
+static struct attribute *cxl_memdev_ram_attributes[] = {
+       &dev_attr_ram_size.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_memdev_attribute_group = {
+       .attrs = cxl_memdev_attributes,
+};
+
+static struct attribute_group cxl_memdev_ram_attribute_group = {
+       .name = "ram",
+       .attrs = cxl_memdev_ram_attributes,
+};
+
+static struct attribute_group cxl_memdev_pmem_attribute_group = {
+       .name = "pmem",
+       .attrs = cxl_memdev_pmem_attributes,
+};
+
+static const struct attribute_group *cxl_memdev_attribute_groups[] = {
+       &cxl_memdev_attribute_group,
+       &cxl_memdev_ram_attribute_group,
+       &cxl_memdev_pmem_attribute_group,
+       NULL,
+};
+
+static const struct device_type cxl_memdev_type = {
+       .name = "cxl_memdev",
+       .release = cxl_memdev_release,
+       .devnode = cxl_memdev_devnode,
+       .groups = cxl_memdev_attribute_groups,
+};
+
+static void cxl_memdev_unregister(void *_cxlmd)
+{
+       struct cxl_memdev *cxlmd = _cxlmd;
+       struct device *dev = &cxlmd->dev;
+       struct cdev *cdev = &cxlmd->cdev;
+       const struct cdevm_file_operations *cdevm_fops;
+
+       cdevm_fops = container_of(cdev->ops, typeof(*cdevm_fops), fops);
+       cdevm_fops->shutdown(dev);
+
+       cdev_device_del(&cxlmd->cdev, dev);
+       put_device(dev);
+}
+
+static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm,
+                                          const struct file_operations *fops)
+{
+       struct pci_dev *pdev = cxlm->pdev;
+       struct cxl_memdev *cxlmd;
+       struct device *dev;
+       struct cdev *cdev;
+       int rc;
+
+       cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
+       if (!cxlmd)
+               return ERR_PTR(-ENOMEM);
+
+       rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
+       if (rc < 0)
+               goto err;
+       cxlmd->id = rc;
+
+       dev = &cxlmd->dev;
+       device_initialize(dev);
+       dev->parent = &pdev->dev;
+       dev->bus = &cxl_bus_type;
+       dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
+       dev->type = &cxl_memdev_type;
+       device_set_pm_not_required(dev);
+
+       cdev = &cxlmd->cdev;
+       cdev_init(cdev, fops);
+       return cxlmd;
+
+err:
+       kfree(cxlmd);
+       return ERR_PTR(rc);
+}
+
+struct cxl_memdev *
+devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
+                   const struct cdevm_file_operations *cdevm_fops)
+{
+       struct cxl_memdev *cxlmd;
+       struct device *dev;
+       struct cdev *cdev;
+       int rc;
+
+       cxlmd = cxl_memdev_alloc(cxlm, &cdevm_fops->fops);
+       if (IS_ERR(cxlmd))
+               return cxlmd;
+
+       dev = &cxlmd->dev;
+       rc = dev_set_name(dev, "mem%d", cxlmd->id);
+       if (rc)
+               goto err;
+
+       /*
+        * Activate ioctl operations, no cxl_memdev_rwsem manipulation
+        * needed as this is ordered with cdev_add() publishing the device.
+        */
+       cxlmd->cxlm = cxlm;
+
+       cdev = &cxlmd->cdev;
+       rc = cdev_device_add(cdev, dev);
+       if (rc)
+               goto err;
+
+       rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
+       if (rc)
+               return ERR_PTR(rc);
+       return cxlmd;
+
+err:
+       /*
+        * The cdev was briefly live, shutdown any ioctl operations that
+        * saw that state.
+        */
+       cdevm_fops->shutdown(dev);
+       put_device(dev);
+       return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(devm_cxl_add_memdev);
+
+__init int cxl_memdev_init(void)
+{
+       dev_t devt;
+       int rc;
+
+       rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
+       if (rc)
+               return rc;
+
+       cxl_mem_major = MAJOR(devt);
+
+       return 0;
+}
+
+void cxl_memdev_exit(void)
+{
+       unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
+}
diff --git a/drivers/cxl/core/pmem.c b/drivers/cxl/core/pmem.c
new file mode 100644 (file)
index 0000000..d24570f
--- /dev/null
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. */
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <cxlmem.h>
+#include <cxl.h>
+#include "core.h"
+
+/**
+ * DOC: cxl pmem
+ *
+ * The core CXL PMEM infrastructure supports persistent memory
+ * provisioning and serves as a bridge to the LIBNVDIMM subsystem. A CXL
+ * 'bridge' device is added at the root of a CXL device topology if
+ * platform firmware advertises at least one persistent memory capable
+ * CXL window. That root-level bridge corresponds to a LIBNVDIMM 'bus'
+ * device. Then for each cxl_memdev in the CXL device topology a bridge
+ * device is added to host a LIBNVDIMM dimm object. When these bridges
+ * are registered native LIBNVDIMM uapis are translated to CXL
+ * operations, for example, namespace label access commands.
+ */
+
+static void cxl_nvdimm_bridge_release(struct device *dev)
+{
+       struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev);
+
+       kfree(cxl_nvb);
+}
+
+static const struct attribute_group *cxl_nvdimm_bridge_attribute_groups[] = {
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+const struct device_type cxl_nvdimm_bridge_type = {
+       .name = "cxl_nvdimm_bridge",
+       .release = cxl_nvdimm_bridge_release,
+       .groups = cxl_nvdimm_bridge_attribute_groups,
+};
+
+struct cxl_nvdimm_bridge *to_cxl_nvdimm_bridge(struct device *dev)
+{
+       if (dev_WARN_ONCE(dev, dev->type != &cxl_nvdimm_bridge_type,
+                         "not a cxl_nvdimm_bridge device\n"))
+               return NULL;
+       return container_of(dev, struct cxl_nvdimm_bridge, dev);
+}
+EXPORT_SYMBOL_GPL(to_cxl_nvdimm_bridge);
+
+static struct cxl_nvdimm_bridge *
+cxl_nvdimm_bridge_alloc(struct cxl_port *port)
+{
+       struct cxl_nvdimm_bridge *cxl_nvb;
+       struct device *dev;
+
+       cxl_nvb = kzalloc(sizeof(*cxl_nvb), GFP_KERNEL);
+       if (!cxl_nvb)
+               return ERR_PTR(-ENOMEM);
+
+       dev = &cxl_nvb->dev;
+       cxl_nvb->port = port;
+       cxl_nvb->state = CXL_NVB_NEW;
+       device_initialize(dev);
+       device_set_pm_not_required(dev);
+       dev->parent = &port->dev;
+       dev->bus = &cxl_bus_type;
+       dev->type = &cxl_nvdimm_bridge_type;
+
+       return cxl_nvb;
+}
+
+static void unregister_nvb(void *_cxl_nvb)
+{
+       struct cxl_nvdimm_bridge *cxl_nvb = _cxl_nvb;
+       bool flush;
+
+       /*
+        * If the bridge was ever activated then there might be in-flight state
+        * work to flush. Once the state has been changed to 'dead' then no new
+        * work can be queued by user-triggered bind.
+        */
+       device_lock(&cxl_nvb->dev);
+       flush = cxl_nvb->state != CXL_NVB_NEW;
+       cxl_nvb->state = CXL_NVB_DEAD;
+       device_unlock(&cxl_nvb->dev);
+
+       /*
+        * Even though the device core will trigger device_release_driver()
+        * before the unregister, it does not know about the fact that
+        * cxl_nvdimm_bridge_driver defers ->remove() work. So, do the driver
+        * release not and flush it before tearing down the nvdimm device
+        * hierarchy.
+        */
+       device_release_driver(&cxl_nvb->dev);
+       if (flush)
+               flush_work(&cxl_nvb->state_work);
+       device_unregister(&cxl_nvb->dev);
+}
+
+/**
+ * devm_cxl_add_nvdimm_bridge() - add the root of a LIBNVDIMM topology
+ * @host: platform firmware root device
+ * @port: CXL port at the root of a CXL topology
+ *
+ * Return: bridge device that can host cxl_nvdimm objects
+ */
+struct cxl_nvdimm_bridge *devm_cxl_add_nvdimm_bridge(struct device *host,
+                                                    struct cxl_port *port)
+{
+       struct cxl_nvdimm_bridge *cxl_nvb;
+       struct device *dev;
+       int rc;
+
+       if (!IS_ENABLED(CONFIG_CXL_PMEM))
+               return ERR_PTR(-ENXIO);
+
+       cxl_nvb = cxl_nvdimm_bridge_alloc(port);
+       if (IS_ERR(cxl_nvb))
+               return cxl_nvb;
+
+       dev = &cxl_nvb->dev;
+       rc = dev_set_name(dev, "nvdimm-bridge");
+       if (rc)
+               goto err;
+
+       rc = device_add(dev);
+       if (rc)
+               goto err;
+
+       rc = devm_add_action_or_reset(host, unregister_nvb, cxl_nvb);
+       if (rc)
+               return ERR_PTR(rc);
+
+       return cxl_nvb;
+
+err:
+       put_device(dev);
+       return ERR_PTR(rc);
+}
+EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm_bridge);
+
+static void cxl_nvdimm_release(struct device *dev)
+{
+       struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev);
+
+       kfree(cxl_nvd);
+}
+
+static const struct attribute_group *cxl_nvdimm_attribute_groups[] = {
+       &cxl_base_attribute_group,
+       NULL,
+};
+
+const struct device_type cxl_nvdimm_type = {
+       .name = "cxl_nvdimm",
+       .release = cxl_nvdimm_release,
+       .groups = cxl_nvdimm_attribute_groups,
+};
+
+bool is_cxl_nvdimm(struct device *dev)
+{
+       return dev->type == &cxl_nvdimm_type;
+}
+EXPORT_SYMBOL_GPL(is_cxl_nvdimm);
+
+struct cxl_nvdimm *to_cxl_nvdimm(struct device *dev)
+{
+       if (dev_WARN_ONCE(dev, !is_cxl_nvdimm(dev),
+                         "not a cxl_nvdimm device\n"))
+               return NULL;
+       return container_of(dev, struct cxl_nvdimm, dev);
+}
+EXPORT_SYMBOL_GPL(to_cxl_nvdimm);
+
+static struct cxl_nvdimm *cxl_nvdimm_alloc(struct cxl_memdev *cxlmd)
+{
+       struct cxl_nvdimm *cxl_nvd;
+       struct device *dev;
+
+       cxl_nvd = kzalloc(sizeof(*cxl_nvd), GFP_KERNEL);
+       if (!cxl_nvd)
+               return ERR_PTR(-ENOMEM);
+
+       dev = &cxl_nvd->dev;
+       cxl_nvd->cxlmd = cxlmd;
+       device_initialize(dev);
+       device_set_pm_not_required(dev);
+       dev->parent = &cxlmd->dev;
+       dev->bus = &cxl_bus_type;
+       dev->type = &cxl_nvdimm_type;
+
+       return cxl_nvd;
+}
+
+/**
+ * devm_cxl_add_nvdimm() - add a bridge between a cxl_memdev and an nvdimm
+ * @host: same host as @cxlmd
+ * @cxlmd: cxl_memdev instance that will perform LIBNVDIMM operations
+ *
+ * Return: 0 on success negative error code on failure.
+ */
+int devm_cxl_add_nvdimm(struct device *host, struct cxl_memdev *cxlmd)
+{
+       struct cxl_nvdimm *cxl_nvd;
+       struct device *dev;
+       int rc;
+
+       cxl_nvd = cxl_nvdimm_alloc(cxlmd);
+       if (IS_ERR(cxl_nvd))
+               return PTR_ERR(cxl_nvd);
+
+       dev = &cxl_nvd->dev;
+       rc = dev_set_name(dev, "pmem%d", cxlmd->id);
+       if (rc)
+               goto err;
+
+       rc = device_add(dev);
+       if (rc)
+               goto err;
+
+       dev_dbg(host, "%s: register %s\n", dev_name(dev->parent),
+               dev_name(dev));
+
+       return devm_add_action_or_reset(host, unregister_cxl_dev, dev);
+
+err:
+       put_device(dev);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(devm_cxl_add_nvdimm);
diff --git a/drivers/cxl/core/regs.c b/drivers/cxl/core/regs.c
new file mode 100644 (file)
index 0000000..41de4a1
--- /dev/null
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. */
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <cxlmem.h>
+
+/**
+ * DOC: cxl registers
+ *
+ * CXL device capabilities are enumerated by PCI DVSEC (Designated
+ * Vendor-specific) and / or descriptors provided by platform firmware.
+ * They can be defined as a set like the device and component registers
+ * mandated by CXL Section 8.1.12.2 Memory Device PCIe Capabilities and
+ * Extended Capabilities, or they can be individual capabilities
+ * appended to bridged and endpoint devices.
+ *
+ * Provide common infrastructure for enumerating and mapping these
+ * discrete capabilities.
+ */
+
+/**
+ * cxl_probe_component_regs() - Detect CXL Component register blocks
+ * @dev: Host device of the @base mapping
+ * @base: Mapping containing the HDM Decoder Capability Header
+ * @map: Map object describing the register block information found
+ *
+ * See CXL 2.0 8.2.4 Component Register Layout and Definition
+ * See CXL 2.0 8.2.5.5 CXL Device Register Interface
+ *
+ * Probe for component register information and return it in map object.
+ */
+void cxl_probe_component_regs(struct device *dev, void __iomem *base,
+                             struct cxl_component_reg_map *map)
+{
+       int cap, cap_count;
+       u64 cap_array;
+
+       *map = (struct cxl_component_reg_map) { 0 };
+
+       /*
+        * CXL.cache and CXL.mem registers are at offset 0x1000 as defined in
+        * CXL 2.0 8.2.4 Table 141.
+        */
+       base += CXL_CM_OFFSET;
+
+       cap_array = readq(base + CXL_CM_CAP_HDR_OFFSET);
+
+       if (FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, cap_array) != CM_CAP_HDR_CAP_ID) {
+               dev_err(dev,
+                       "Couldn't locate the CXL.cache and CXL.mem capability array header./n");
+               return;
+       }
+
+       /* It's assumed that future versions will be backward compatible */
+       cap_count = FIELD_GET(CXL_CM_CAP_HDR_ARRAY_SIZE_MASK, cap_array);
+
+       for (cap = 1; cap <= cap_count; cap++) {
+               void __iomem *register_block;
+               u32 hdr;
+               int decoder_cnt;
+               u16 cap_id, offset;
+               u32 length;
+
+               hdr = readl(base + cap * 0x4);
+
+               cap_id = FIELD_GET(CXL_CM_CAP_HDR_ID_MASK, hdr);
+               offset = FIELD_GET(CXL_CM_CAP_PTR_MASK, hdr);
+               register_block = base + offset;
+
+               switch (cap_id) {
+               case CXL_CM_CAP_CAP_ID_HDM:
+                       dev_dbg(dev, "found HDM decoder capability (0x%x)\n",
+                               offset);
+
+                       hdr = readl(register_block);
+
+                       decoder_cnt = cxl_hdm_decoder_count(hdr);
+                       length = 0x20 * decoder_cnt + 0x10;
+
+                       map->hdm_decoder.valid = true;
+                       map->hdm_decoder.offset = CXL_CM_OFFSET + offset;
+                       map->hdm_decoder.size = length;
+                       break;
+               default:
+                       dev_dbg(dev, "Unknown CM cap ID: %d (0x%x)\n", cap_id,
+                               offset);
+                       break;
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(cxl_probe_component_regs);
+
+/**
+ * cxl_probe_device_regs() - Detect CXL Device register blocks
+ * @dev: Host device of the @base mapping
+ * @base: Mapping of CXL 2.0 8.2.8 CXL Device Register Interface
+ * @map: Map object describing the register block information found
+ *
+ * Probe for device register information and return it in map object.
+ */
+void cxl_probe_device_regs(struct device *dev, void __iomem *base,
+                          struct cxl_device_reg_map *map)
+{
+       int cap, cap_count;
+       u64 cap_array;
+
+       *map = (struct cxl_device_reg_map){ 0 };
+
+       cap_array = readq(base + CXLDEV_CAP_ARRAY_OFFSET);
+       if (FIELD_GET(CXLDEV_CAP_ARRAY_ID_MASK, cap_array) !=
+           CXLDEV_CAP_ARRAY_CAP_ID)
+               return;
+
+       cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array);
+
+       for (cap = 1; cap <= cap_count; cap++) {
+               u32 offset, length;
+               u16 cap_id;
+
+               cap_id = FIELD_GET(CXLDEV_CAP_HDR_CAP_ID_MASK,
+                                  readl(base + cap * 0x10));
+               offset = readl(base + cap * 0x10 + 0x4);
+               length = readl(base + cap * 0x10 + 0x8);
+
+               switch (cap_id) {
+               case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
+                       dev_dbg(dev, "found Status capability (0x%x)\n", offset);
+
+                       map->status.valid = true;
+                       map->status.offset = offset;
+                       map->status.size = length;
+                       break;
+               case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
+                       dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset);
+                       map->mbox.valid = true;
+                       map->mbox.offset = offset;
+                       map->mbox.size = length;
+                       break;
+               case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
+                       dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset);
+                       break;
+               case CXLDEV_CAP_CAP_ID_MEMDEV:
+                       dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset);
+                       map->memdev.valid = true;
+                       map->memdev.offset = offset;
+                       map->memdev.size = length;
+                       break;
+               default:
+                       if (cap_id >= 0x8000)
+                               dev_dbg(dev, "Vendor cap ID: %#x offset: %#x\n", cap_id, offset);
+                       else
+                               dev_dbg(dev, "Unknown cap ID: %#x offset: %#x\n", cap_id, offset);
+                       break;
+               }
+       }
+}
+EXPORT_SYMBOL_GPL(cxl_probe_device_regs);
+
+static void __iomem *devm_cxl_iomap_block(struct device *dev,
+                                         resource_size_t addr,
+                                         resource_size_t length)
+{
+       void __iomem *ret_val;
+       struct resource *res;
+
+       res = devm_request_mem_region(dev, addr, length, dev_name(dev));
+       if (!res) {
+               resource_size_t end = addr + length - 1;
+
+               dev_err(dev, "Failed to request region %pa-%pa\n", &addr, &end);
+               return NULL;
+       }
+
+       ret_val = devm_ioremap(dev, addr, length);
+       if (!ret_val)
+               dev_err(dev, "Failed to map region %pr\n", res);
+
+       return ret_val;
+}
+
+int cxl_map_component_regs(struct pci_dev *pdev,
+                          struct cxl_component_regs *regs,
+                          struct cxl_register_map *map)
+{
+       struct device *dev = &pdev->dev;
+       resource_size_t phys_addr;
+       resource_size_t length;
+
+       phys_addr = pci_resource_start(pdev, map->barno);
+       phys_addr += map->block_offset;
+
+       phys_addr += map->component_map.hdm_decoder.offset;
+       length = map->component_map.hdm_decoder.size;
+       regs->hdm_decoder = devm_cxl_iomap_block(dev, phys_addr, length);
+       if (!regs->hdm_decoder)
+               return -ENOMEM;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_map_component_regs);
+
+int cxl_map_device_regs(struct pci_dev *pdev,
+                       struct cxl_device_regs *regs,
+                       struct cxl_register_map *map)
+{
+       struct device *dev = &pdev->dev;
+       resource_size_t phys_addr;
+
+       phys_addr = pci_resource_start(pdev, map->barno);
+       phys_addr += map->block_offset;
+
+       if (map->device_map.status.valid) {
+               resource_size_t addr;
+               resource_size_t length;
+
+               addr = phys_addr + map->device_map.status.offset;
+               length = map->device_map.status.size;
+               regs->status = devm_cxl_iomap_block(dev, addr, length);
+               if (!regs->status)
+                       return -ENOMEM;
+       }
+
+       if (map->device_map.mbox.valid) {
+               resource_size_t addr;
+               resource_size_t length;
+
+               addr = phys_addr + map->device_map.mbox.offset;
+               length = map->device_map.mbox.size;
+               regs->mbox = devm_cxl_iomap_block(dev, addr, length);
+               if (!regs->mbox)
+                       return -ENOMEM;
+       }
+
+       if (map->device_map.memdev.valid) {
+               resource_size_t addr;
+               resource_size_t length;
+
+               addr = phys_addr + map->device_map.memdev.offset;
+               length = map->device_map.memdev.size;
+               regs->memdev = devm_cxl_iomap_block(dev, addr, length);
+               if (!regs->memdev)
+                       return -ENOMEM;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(cxl_map_device_regs);
index b6bda39..53927f9 100644 (file)
@@ -140,7 +140,6 @@ struct cxl_device_reg_map {
 };
 
 struct cxl_register_map {
-       struct list_head list;
        u64 block_offset;
        u8 reg_type;
        u8 barno;
diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
new file mode 100644 (file)
index 0000000..6c0b1e2
--- /dev/null
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2020-2021 Intel Corporation. */
+#ifndef __CXL_MEM_H__
+#define __CXL_MEM_H__
+#include <linux/cdev.h>
+#include "cxl.h"
+
+/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
+#define CXLMDEV_STATUS_OFFSET 0x0
+#define   CXLMDEV_DEV_FATAL BIT(0)
+#define   CXLMDEV_FW_HALT BIT(1)
+#define   CXLMDEV_STATUS_MEDIA_STATUS_MASK GENMASK(3, 2)
+#define     CXLMDEV_MS_NOT_READY 0
+#define     CXLMDEV_MS_READY 1
+#define     CXLMDEV_MS_ERROR 2
+#define     CXLMDEV_MS_DISABLED 3
+#define CXLMDEV_READY(status)                                                  \
+       (FIELD_GET(CXLMDEV_STATUS_MEDIA_STATUS_MASK, status) ==                \
+        CXLMDEV_MS_READY)
+#define   CXLMDEV_MBOX_IF_READY BIT(4)
+#define   CXLMDEV_RESET_NEEDED_MASK GENMASK(7, 5)
+#define     CXLMDEV_RESET_NEEDED_NOT 0
+#define     CXLMDEV_RESET_NEEDED_COLD 1
+#define     CXLMDEV_RESET_NEEDED_WARM 2
+#define     CXLMDEV_RESET_NEEDED_HOT 3
+#define     CXLMDEV_RESET_NEEDED_CXL 4
+#define CXLMDEV_RESET_NEEDED(status)                                           \
+       (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) !=                       \
+        CXLMDEV_RESET_NEEDED_NOT)
+
+/**
+ * struct cdevm_file_operations - devm coordinated cdev file operations
+ * @fops: file operations that are synchronized against @shutdown
+ * @shutdown: disconnect driver data
+ *
+ * @shutdown is invoked in the devres release path to disconnect any
+ * driver instance data from @dev. It assumes synchronization with any
+ * fops operation that requires driver data. After @shutdown an
+ * operation may only reference @device data.
+ */
+struct cdevm_file_operations {
+       struct file_operations fops;
+       void (*shutdown)(struct device *dev);
+};
+
+/**
+ * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device
+ * @dev: driver core device object
+ * @cdev: char dev core object for ioctl operations
+ * @cxlm: pointer to the parent device driver data
+ * @id: id number of this memdev instance.
+ */
+struct cxl_memdev {
+       struct device dev;
+       struct cdev cdev;
+       struct cxl_mem *cxlm;
+       int id;
+};
+
+static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
+{
+       return container_of(dev, struct cxl_memdev, dev);
+}
+
+struct cxl_memdev *
+devm_cxl_add_memdev(struct device *host, struct cxl_mem *cxlm,
+                   const struct cdevm_file_operations *cdevm_fops);
+
+/**
+ * struct cxl_mem - A CXL memory device
+ * @pdev: The PCI device associated with this CXL device.
+ * @cxlmd: Logical memory device chardev / interface
+ * @regs: Parsed register blocks
+ * @payload_size: Size of space for payload
+ *                (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
+ * @lsa_size: Size of Label Storage Area
+ *                (CXL 2.0 8.2.9.5.1.1 Identify Memory Device)
+ * @mbox_mutex: Mutex to synchronize mailbox access.
+ * @firmware_version: Firmware version for the memory device.
+ * @enabled_cmds: Hardware commands found enabled in CEL.
+ * @pmem_range: Persistent memory capacity information.
+ * @ram_range: Volatile memory capacity information.
+ */
+struct cxl_mem {
+       struct pci_dev *pdev;
+       struct cxl_memdev *cxlmd;
+
+       struct cxl_regs regs;
+
+       size_t payload_size;
+       size_t lsa_size;
+       struct mutex mbox_mutex; /* Protects device mailbox and firmware */
+       char firmware_version[0x10];
+       unsigned long *enabled_cmds;
+
+       struct range pmem_range;
+       struct range ram_range;
+       u64 total_bytes;
+       u64 volatile_only_bytes;
+       u64 persistent_only_bytes;
+       u64 partition_align_bytes;
+
+       u64 active_volatile_bytes;
+       u64 active_persistent_bytes;
+       u64 next_volatile_bytes;
+       u64 next_persistent_bytes;
+};
+#endif /* __CXL_MEM_H__ */
diff --git a/drivers/cxl/mem.h b/drivers/cxl/mem.h
deleted file mode 100644 (file)
index 8f02d02..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/* Copyright(c) 2020-2021 Intel Corporation. */
-#ifndef __CXL_MEM_H__
-#define __CXL_MEM_H__
-#include <linux/cdev.h>
-#include "cxl.h"
-
-/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
-#define CXLMDEV_STATUS_OFFSET 0x0
-#define   CXLMDEV_DEV_FATAL BIT(0)
-#define   CXLMDEV_FW_HALT BIT(1)
-#define   CXLMDEV_STATUS_MEDIA_STATUS_MASK GENMASK(3, 2)
-#define     CXLMDEV_MS_NOT_READY 0
-#define     CXLMDEV_MS_READY 1
-#define     CXLMDEV_MS_ERROR 2
-#define     CXLMDEV_MS_DISABLED 3
-#define CXLMDEV_READY(status)                                                  \
-       (FIELD_GET(CXLMDEV_STATUS_MEDIA_STATUS_MASK, status) ==                \
-        CXLMDEV_MS_READY)
-#define   CXLMDEV_MBOX_IF_READY BIT(4)
-#define   CXLMDEV_RESET_NEEDED_MASK GENMASK(7, 5)
-#define     CXLMDEV_RESET_NEEDED_NOT 0
-#define     CXLMDEV_RESET_NEEDED_COLD 1
-#define     CXLMDEV_RESET_NEEDED_WARM 2
-#define     CXLMDEV_RESET_NEEDED_HOT 3
-#define     CXLMDEV_RESET_NEEDED_CXL 4
-#define CXLMDEV_RESET_NEEDED(status)                                           \
-       (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) !=                       \
-        CXLMDEV_RESET_NEEDED_NOT)
-
-/*
- * An entire PCI topology full of devices should be enough for any
- * config
- */
-#define CXL_MEM_MAX_DEVS 65536
-
-/**
- * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device
- * @dev: driver core device object
- * @cdev: char dev core object for ioctl operations
- * @cxlm: pointer to the parent device driver data
- * @id: id number of this memdev instance.
- */
-struct cxl_memdev {
-       struct device dev;
-       struct cdev cdev;
-       struct cxl_mem *cxlm;
-       int id;
-};
-
-/**
- * struct cxl_mem - A CXL memory device
- * @pdev: The PCI device associated with this CXL device.
- * @cxlmd: Logical memory device chardev / interface
- * @regs: Parsed register blocks
- * @payload_size: Size of space for payload
- *                (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
- * @lsa_size: Size of Label Storage Area
- *                (CXL 2.0 8.2.9.5.1.1 Identify Memory Device)
- * @mbox_mutex: Mutex to synchronize mailbox access.
- * @firmware_version: Firmware version for the memory device.
- * @enabled_cmds: Hardware commands found enabled in CEL.
- * @pmem_range: Persistent memory capacity information.
- * @ram_range: Volatile memory capacity information.
- */
-struct cxl_mem {
-       struct pci_dev *pdev;
-       struct cxl_memdev *cxlmd;
-
-       struct cxl_regs regs;
-
-       size_t payload_size;
-       size_t lsa_size;
-       struct mutex mbox_mutex; /* Protects device mailbox and firmware */
-       char firmware_version[0x10];
-       unsigned long *enabled_cmds;
-
-       struct range pmem_range;
-       struct range ram_range;
-};
-#endif /* __CXL_MEM_H__ */
index 4cf351a..8e45aa0 100644 (file)
@@ -12,9 +12,9 @@
 #include <linux/pci.h>
 #include <linux/io.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include "cxlmem.h"
 #include "pci.h"
 #include "cxl.h"
-#include "mem.h"
 
 /**
  * DOC: cxl pci
@@ -64,6 +64,15 @@ enum opcode {
        CXL_MBOX_OP_MAX                 = 0x10000
 };
 
+/*
+ * CXL 2.0 - Memory capacity multiplier
+ * See Section 8.2.9.5
+ *
+ * Volatile, Persistent, and Partition capacities are specified to be in
+ * multiples of 256MB - define a multiplier to convert to/from bytes.
+ */
+#define CXL_CAPACITY_MULTIPLIER SZ_256M
+
 /**
  * struct mbox_cmd - A command to be submitted to hardware.
  * @opcode: (input) The command set and command submitted to hardware.
@@ -94,8 +103,6 @@ struct mbox_cmd {
 #define CXL_MBOX_SUCCESS 0
 };
 
-static int cxl_mem_major;
-static DEFINE_IDA(cxl_memdev_ida);
 static DECLARE_RWSEM(cxl_memdev_rwsem);
 static struct dentry *cxl_debugfs;
 static bool cxl_raw_allow_all;
@@ -568,7 +575,7 @@ static bool cxl_mem_raw_command_allowed(u16 opcode)
        if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
                return false;
 
-       if (security_locked_down(LOCKDOWN_NONE))
+       if (security_locked_down(LOCKDOWN_PCI_ACCESS))
                return false;
 
        if (cxl_raw_allow_all)
@@ -806,13 +813,25 @@ static int cxl_memdev_release_file(struct inode *inode, struct file *file)
        return 0;
 }
 
-static const struct file_operations cxl_memdev_fops = {
-       .owner = THIS_MODULE,
-       .unlocked_ioctl = cxl_memdev_ioctl,
-       .open = cxl_memdev_open,
-       .release = cxl_memdev_release_file,
-       .compat_ioctl = compat_ptr_ioctl,
-       .llseek = noop_llseek,
+static void cxl_memdev_shutdown(struct device *dev)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+
+       down_write(&cxl_memdev_rwsem);
+       cxlmd->cxlm = NULL;
+       up_write(&cxl_memdev_rwsem);
+}
+
+static const struct cdevm_file_operations cxl_memdev_fops = {
+       .fops = {
+               .owner = THIS_MODULE,
+               .unlocked_ioctl = cxl_memdev_ioctl,
+               .open = cxl_memdev_open,
+               .release = cxl_memdev_release_file,
+               .compat_ioctl = compat_ptr_ioctl,
+               .llseek = noop_llseek,
+       },
+       .shutdown = cxl_memdev_shutdown,
 };
 
 static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
@@ -1022,8 +1041,8 @@ static int cxl_probe_regs(struct cxl_mem *cxlm, void __iomem *base,
                    !dev_map->memdev.valid) {
                        dev_err(dev, "registers not found: %s%s%s\n",
                                !dev_map->status.valid ? "status " : "",
-                               !dev_map->mbox.valid ? "status " : "",
-                               !dev_map->memdev.valid ? "status " : "");
+                               !dev_map->mbox.valid ? "mbox " : "",
+                               !dev_map->memdev.valid ? "memdev " : "");
                        return -ENXIO;
                }
 
@@ -1081,9 +1100,8 @@ static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
        struct device *dev = &pdev->dev;
        u32 regloc_size, regblocks;
        void __iomem *base;
-       int regloc, i;
-       struct cxl_register_map *map, *n;
-       LIST_HEAD(register_maps);
+       int regloc, i, n_maps;
+       struct cxl_register_map *map, maps[CXL_REGLOC_RBI_TYPES];
        int ret = 0;
 
        regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID);
@@ -1102,20 +1120,12 @@ static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
        regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
        regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
 
-       for (i = 0; i < regblocks; i++, regloc += 8) {
+       for (i = 0, n_maps = 0; i < regblocks; i++, regloc += 8) {
                u32 reg_lo, reg_hi;
                u8 reg_type;
                u64 offset;
                u8 bar;
 
-               map = kzalloc(sizeof(*map), GFP_KERNEL);
-               if (!map) {
-                       ret = -ENOMEM;
-                       goto free_maps;
-               }
-
-               list_add(&map->list, &register_maps);
-
                pci_read_config_dword(pdev, regloc, &reg_lo);
                pci_read_config_dword(pdev, regloc + 4, &reg_hi);
 
@@ -1125,12 +1135,15 @@ static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
                dev_dbg(dev, "Found register block in bar %u @ 0x%llx of type %u\n",
                        bar, offset, reg_type);
 
+               /* Ignore unknown register block types */
+               if (reg_type > CXL_REGLOC_RBI_MEMDEV)
+                       continue;
+
                base = cxl_mem_map_regblock(cxlm, bar, offset);
-               if (!base) {
-                       ret = -ENOMEM;
-                       goto free_maps;
-               }
+               if (!base)
+                       return -ENOMEM;
 
+               map = &maps[n_maps];
                map->barno = bar;
                map->block_offset = offset;
                map->reg_type = reg_type;
@@ -1141,240 +1154,22 @@ static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
                cxl_mem_unmap_regblock(cxlm, base);
 
                if (ret)
-                       goto free_maps;
+                       return ret;
+
+               n_maps++;
        }
 
        pci_release_mem_regions(pdev);
 
-       list_for_each_entry(map, &register_maps, list) {
-               ret = cxl_map_regs(cxlm, map);
+       for (i = 0; i < n_maps; i++) {
+               ret = cxl_map_regs(cxlm, &maps[i]);
                if (ret)
-                       goto free_maps;
-       }
-
-free_maps:
-       list_for_each_entry_safe(map, n, &register_maps, list) {
-               list_del(&map->list);
-               kfree(map);
+                       break;
        }
 
        return ret;
 }
 
-static struct cxl_memdev *to_cxl_memdev(struct device *dev)
-{
-       return container_of(dev, struct cxl_memdev, dev);
-}
-
-static void cxl_memdev_release(struct device *dev)
-{
-       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-
-       ida_free(&cxl_memdev_ida, cxlmd->id);
-       kfree(cxlmd);
-}
-
-static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
-                               kgid_t *gid)
-{
-       return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
-}
-
-static ssize_t firmware_version_show(struct device *dev,
-                                    struct device_attribute *attr, char *buf)
-{
-       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-       struct cxl_mem *cxlm = cxlmd->cxlm;
-
-       return sysfs_emit(buf, "%.16s\n", cxlm->firmware_version);
-}
-static DEVICE_ATTR_RO(firmware_version);
-
-static ssize_t payload_max_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-       struct cxl_mem *cxlm = cxlmd->cxlm;
-
-       return sysfs_emit(buf, "%zu\n", cxlm->payload_size);
-}
-static DEVICE_ATTR_RO(payload_max);
-
-static ssize_t label_storage_size_show(struct device *dev,
-                               struct device_attribute *attr, char *buf)
-{
-       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-       struct cxl_mem *cxlm = cxlmd->cxlm;
-
-       return sysfs_emit(buf, "%zu\n", cxlm->lsa_size);
-}
-static DEVICE_ATTR_RO(label_storage_size);
-
-static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
-                            char *buf)
-{
-       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-       struct cxl_mem *cxlm = cxlmd->cxlm;
-       unsigned long long len = range_len(&cxlm->ram_range);
-
-       return sysfs_emit(buf, "%#llx\n", len);
-}
-
-static struct device_attribute dev_attr_ram_size =
-       __ATTR(size, 0444, ram_size_show, NULL);
-
-static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
-                             char *buf)
-{
-       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
-       struct cxl_mem *cxlm = cxlmd->cxlm;
-       unsigned long long len = range_len(&cxlm->pmem_range);
-
-       return sysfs_emit(buf, "%#llx\n", len);
-}
-
-static struct device_attribute dev_attr_pmem_size =
-       __ATTR(size, 0444, pmem_size_show, NULL);
-
-static struct attribute *cxl_memdev_attributes[] = {
-       &dev_attr_firmware_version.attr,
-       &dev_attr_payload_max.attr,
-       &dev_attr_label_storage_size.attr,
-       NULL,
-};
-
-static struct attribute *cxl_memdev_pmem_attributes[] = {
-       &dev_attr_pmem_size.attr,
-       NULL,
-};
-
-static struct attribute *cxl_memdev_ram_attributes[] = {
-       &dev_attr_ram_size.attr,
-       NULL,
-};
-
-static struct attribute_group cxl_memdev_attribute_group = {
-       .attrs = cxl_memdev_attributes,
-};
-
-static struct attribute_group cxl_memdev_ram_attribute_group = {
-       .name = "ram",
-       .attrs = cxl_memdev_ram_attributes,
-};
-
-static struct attribute_group cxl_memdev_pmem_attribute_group = {
-       .name = "pmem",
-       .attrs = cxl_memdev_pmem_attributes,
-};
-
-static const struct attribute_group *cxl_memdev_attribute_groups[] = {
-       &cxl_memdev_attribute_group,
-       &cxl_memdev_ram_attribute_group,
-       &cxl_memdev_pmem_attribute_group,
-       NULL,
-};
-
-static const struct device_type cxl_memdev_type = {
-       .name = "cxl_memdev",
-       .release = cxl_memdev_release,
-       .devnode = cxl_memdev_devnode,
-       .groups = cxl_memdev_attribute_groups,
-};
-
-static void cxl_memdev_shutdown(struct cxl_memdev *cxlmd)
-{
-       down_write(&cxl_memdev_rwsem);
-       cxlmd->cxlm = NULL;
-       up_write(&cxl_memdev_rwsem);
-}
-
-static void cxl_memdev_unregister(void *_cxlmd)
-{
-       struct cxl_memdev *cxlmd = _cxlmd;
-       struct device *dev = &cxlmd->dev;
-
-       cdev_device_del(&cxlmd->cdev, dev);
-       cxl_memdev_shutdown(cxlmd);
-       put_device(dev);
-}
-
-static struct cxl_memdev *cxl_memdev_alloc(struct cxl_mem *cxlm)
-{
-       struct pci_dev *pdev = cxlm->pdev;
-       struct cxl_memdev *cxlmd;
-       struct device *dev;
-       struct cdev *cdev;
-       int rc;
-
-       cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
-       if (!cxlmd)
-               return ERR_PTR(-ENOMEM);
-
-       rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
-       if (rc < 0)
-               goto err;
-       cxlmd->id = rc;
-
-       dev = &cxlmd->dev;
-       device_initialize(dev);
-       dev->parent = &pdev->dev;
-       dev->bus = &cxl_bus_type;
-       dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
-       dev->type = &cxl_memdev_type;
-       device_set_pm_not_required(dev);
-
-       cdev = &cxlmd->cdev;
-       cdev_init(cdev, &cxl_memdev_fops);
-       return cxlmd;
-
-err:
-       kfree(cxlmd);
-       return ERR_PTR(rc);
-}
-
-static struct cxl_memdev *devm_cxl_add_memdev(struct device *host,
-                                             struct cxl_mem *cxlm)
-{
-       struct cxl_memdev *cxlmd;
-       struct device *dev;
-       struct cdev *cdev;
-       int rc;
-
-       cxlmd = cxl_memdev_alloc(cxlm);
-       if (IS_ERR(cxlmd))
-               return cxlmd;
-
-       dev = &cxlmd->dev;
-       rc = dev_set_name(dev, "mem%d", cxlmd->id);
-       if (rc)
-               goto err;
-
-       /*
-        * Activate ioctl operations, no cxl_memdev_rwsem manipulation
-        * needed as this is ordered with cdev_add() publishing the device.
-        */
-       cxlmd->cxlm = cxlm;
-
-       cdev = &cxlmd->cdev;
-       rc = cdev_device_add(cdev, dev);
-       if (rc)
-               goto err;
-
-       rc = devm_add_action_or_reset(host, cxl_memdev_unregister, cxlmd);
-       if (rc)
-               return ERR_PTR(rc);
-       return cxlmd;
-
-err:
-       /*
-        * The cdev was briefly live, shutdown any ioctl operations that
-        * saw that state.
-        */
-       cxl_memdev_shutdown(cxlmd);
-       put_device(dev);
-       return ERR_PTR(rc);
-}
-
 static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
 {
        u32 remaining = size;
@@ -1468,6 +1263,53 @@ static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
        return ret;
 }
 
+/**
+ * cxl_mem_get_partition_info - Get partition info
+ * @cxlm: The device to act on
+ * @active_volatile_bytes: returned active volatile capacity
+ * @active_persistent_bytes: returned active persistent capacity
+ * @next_volatile_bytes: return next volatile capacity
+ * @next_persistent_bytes: return next persistent capacity
+ *
+ * Retrieve the current partition info for the device specified.  If not 0, the
+ * 'next' values are pending and take affect on next cold reset.
+ *
+ * Return: 0 if no error: or the result of the mailbox command.
+ *
+ * See CXL @8.2.9.5.2.1 Get Partition Info
+ */
+static int cxl_mem_get_partition_info(struct cxl_mem *cxlm,
+                                     u64 *active_volatile_bytes,
+                                     u64 *active_persistent_bytes,
+                                     u64 *next_volatile_bytes,
+                                     u64 *next_persistent_bytes)
+{
+       struct cxl_mbox_get_partition_info {
+               __le64 active_volatile_cap;
+               __le64 active_persistent_cap;
+               __le64 next_volatile_cap;
+               __le64 next_persistent_cap;
+       } __packed pi;
+       int rc;
+
+       rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_PARTITION_INFO,
+                                  NULL, 0, &pi, sizeof(pi));
+       if (rc)
+               return rc;
+
+       *active_volatile_bytes = le64_to_cpu(pi.active_volatile_cap);
+       *active_persistent_bytes = le64_to_cpu(pi.active_persistent_cap);
+       *next_volatile_bytes = le64_to_cpu(pi.next_volatile_cap);
+       *next_persistent_bytes = le64_to_cpu(pi.next_volatile_cap);
+
+       *active_volatile_bytes *= CXL_CAPACITY_MULTIPLIER;
+       *active_persistent_bytes *= CXL_CAPACITY_MULTIPLIER;
+       *next_volatile_bytes *= CXL_CAPACITY_MULTIPLIER;
+       *next_persistent_bytes *= CXL_CAPACITY_MULTIPLIER;
+
+       return 0;
+}
+
 /**
  * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
  * @cxlm: The device.
@@ -1564,16 +1406,27 @@ static int cxl_mem_identify(struct cxl_mem *cxlm)
        if (rc < 0)
                return rc;
 
-       /*
-        * TODO: enumerate DPA map, as 'ram' and 'pmem' do not alias.
-        * For now, only the capacity is exported in sysfs
-        */
-       cxlm->ram_range.start = 0;
-       cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) * SZ_256M - 1;
+       cxlm->total_bytes = le64_to_cpu(id.total_capacity);
+       cxlm->total_bytes *= CXL_CAPACITY_MULTIPLIER;
+
+       cxlm->volatile_only_bytes = le64_to_cpu(id.volatile_capacity);
+       cxlm->volatile_only_bytes *= CXL_CAPACITY_MULTIPLIER;
 
-       cxlm->pmem_range.start = 0;
-       cxlm->pmem_range.end =
-               le64_to_cpu(id.persistent_capacity) * SZ_256M - 1;
+       cxlm->persistent_only_bytes = le64_to_cpu(id.persistent_capacity);
+       cxlm->persistent_only_bytes *= CXL_CAPACITY_MULTIPLIER;
+
+       cxlm->partition_align_bytes = le64_to_cpu(id.partition_align);
+       cxlm->partition_align_bytes *= CXL_CAPACITY_MULTIPLIER;
+
+       dev_dbg(&cxlm->pdev->dev, "Identify Memory Device\n"
+               "     total_bytes = %#llx\n"
+               "     volatile_only_bytes = %#llx\n"
+               "     persistent_only_bytes = %#llx\n"
+               "     partition_align_bytes = %#llx\n",
+                       cxlm->total_bytes,
+                       cxlm->volatile_only_bytes,
+                       cxlm->persistent_only_bytes,
+                       cxlm->partition_align_bytes);
 
        cxlm->lsa_size = le32_to_cpu(id.lsa_size);
        memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
@@ -1581,6 +1434,49 @@ static int cxl_mem_identify(struct cxl_mem *cxlm)
        return 0;
 }
 
+static int cxl_mem_create_range_info(struct cxl_mem *cxlm)
+{
+       int rc;
+
+       if (cxlm->partition_align_bytes == 0) {
+               cxlm->ram_range.start = 0;
+               cxlm->ram_range.end = cxlm->volatile_only_bytes - 1;
+               cxlm->pmem_range.start = cxlm->volatile_only_bytes;
+               cxlm->pmem_range.end = cxlm->volatile_only_bytes +
+                                       cxlm->persistent_only_bytes - 1;
+               return 0;
+       }
+
+       rc = cxl_mem_get_partition_info(cxlm,
+                                       &cxlm->active_volatile_bytes,
+                                       &cxlm->active_persistent_bytes,
+                                       &cxlm->next_volatile_bytes,
+                                       &cxlm->next_persistent_bytes);
+       if (rc < 0) {
+               dev_err(&cxlm->pdev->dev, "Failed to query partition information\n");
+               return rc;
+       }
+
+       dev_dbg(&cxlm->pdev->dev, "Get Partition Info\n"
+               "     active_volatile_bytes = %#llx\n"
+               "     active_persistent_bytes = %#llx\n"
+               "     next_volatile_bytes = %#llx\n"
+               "     next_persistent_bytes = %#llx\n",
+                       cxlm->active_volatile_bytes,
+                       cxlm->active_persistent_bytes,
+                       cxlm->next_volatile_bytes,
+                       cxlm->next_persistent_bytes);
+
+       cxlm->ram_range.start = 0;
+       cxlm->ram_range.end = cxlm->active_volatile_bytes - 1;
+
+       cxlm->pmem_range.start = cxlm->active_volatile_bytes;
+       cxlm->pmem_range.end = cxlm->active_volatile_bytes +
+                               cxlm->active_persistent_bytes - 1;
+
+       return 0;
+}
+
 static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        struct cxl_memdev *cxlmd;
@@ -1611,7 +1507,11 @@ static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (rc)
                return rc;
 
-       cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm);
+       rc = cxl_mem_create_range_info(cxlm);
+       if (rc)
+               return rc;
+
+       cxlmd = devm_cxl_add_memdev(&pdev->dev, cxlm, &cxl_memdev_fops);
        if (IS_ERR(cxlmd))
                return PTR_ERR(cxlmd);
 
@@ -1640,25 +1540,15 @@ static struct pci_driver cxl_mem_driver = {
 static __init int cxl_mem_init(void)
 {
        struct dentry *mbox_debugfs;
-       dev_t devt;
        int rc;
 
        /* Double check the anonymous union trickery in struct cxl_regs */
        BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) !=
                     offsetof(struct cxl_regs, device_regs.memdev));
 
-       rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
-       if (rc)
-               return rc;
-
-       cxl_mem_major = MAJOR(devt);
-
        rc = pci_register_driver(&cxl_mem_driver);
-       if (rc) {
-               unregister_chrdev_region(MKDEV(cxl_mem_major, 0),
-                                        CXL_MEM_MAX_DEVS);
+       if (rc)
                return rc;
-       }
 
        cxl_debugfs = debugfs_create_dir("cxl", NULL);
        mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
@@ -1672,7 +1562,6 @@ static __exit void cxl_mem_exit(void)
 {
        debugfs_remove_recursive(cxl_debugfs);
        pci_unregister_driver(&cxl_mem_driver);
-       unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
 }
 
 MODULE_LICENSE("GPL v2");
index dad7a83..8c1a588 100644 (file)
@@ -25,6 +25,7 @@
 #define CXL_REGLOC_RBI_COMPONENT 1
 #define CXL_REGLOC_RBI_VIRT 2
 #define CXL_REGLOC_RBI_MEMDEV 3
+#define CXL_REGLOC_RBI_TYPES CXL_REGLOC_RBI_MEMDEV + 1
 
 #define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
 
index 0088e41..9652c3e 100644 (file)
@@ -6,7 +6,7 @@
 #include <linux/ndctl.h>
 #include <linux/async.h>
 #include <linux/slab.h>
-#include "mem.h"
+#include "cxlmem.h"
 #include "cxl.h"
 
 /*
index 44736cb..fc89e91 100644 (file)
 #include <linux/fs.h>
 #include "dax-private.h"
 
+/**
+ * struct dax_device - anchor object for dax services
+ * @inode: core vfs
+ * @cdev: optional character interface for "device dax"
+ * @host: optional name for lookups where the device path is not available
+ * @private: dax driver private data
+ * @flags: state and boolean properties
+ */
+struct dax_device {
+       struct hlist_node list;
+       struct inode inode;
+       struct cdev cdev;
+       const char *host;
+       void *private;
+       unsigned long flags;
+       const struct dax_operations *ops;
+};
+
 static dev_t dax_devt;
 DEFINE_STATIC_SRCU(dax_srcu);
 static struct vfsmount *dax_mnt;
@@ -40,6 +58,42 @@ void dax_read_unlock(int id)
 }
 EXPORT_SYMBOL_GPL(dax_read_unlock);
 
+static int dax_host_hash(const char *host)
+{
+       return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
+}
+
+/**
+ * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
+ * @host: alternate name for the device registered by a dax driver
+ */
+static struct dax_device *dax_get_by_host(const char *host)
+{
+       struct dax_device *dax_dev, *found = NULL;
+       int hash, id;
+
+       if (!host)
+               return NULL;
+
+       hash = dax_host_hash(host);
+
+       id = dax_read_lock();
+       spin_lock(&dax_host_lock);
+       hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
+               if (!dax_alive(dax_dev)
+                               || strcmp(host, dax_dev->host) != 0)
+                       continue;
+
+               if (igrab(&dax_dev->inode))
+                       found = dax_dev;
+               break;
+       }
+       spin_unlock(&dax_host_lock);
+       dax_read_unlock(id);
+
+       return found;
+}
+
 #ifdef CONFIG_BLOCK
 #include <linux/blkdev.h>
 
@@ -65,15 +119,13 @@ struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
        return dax_get_by_host(bdev->bd_disk->disk_name);
 }
 EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev);
-#endif
 
-bool __generic_fsdax_supported(struct dax_device *dax_dev,
+bool generic_fsdax_supported(struct dax_device *dax_dev,
                struct block_device *bdev, int blocksize, sector_t start,
                sector_t sectors)
 {
        bool dax_enabled = false;
        pgoff_t pgoff, pgoff_end;
-       char buf[BDEVNAME_SIZE];
        void *kaddr, *end_kaddr;
        pfn_t pfn, end_pfn;
        sector_t last_page;
@@ -81,29 +133,25 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
        int err, id;
 
        if (blocksize != PAGE_SIZE) {
-               pr_info("%s: error: unsupported blocksize for dax\n",
-                               bdevname(bdev, buf));
+               pr_info("%pg: error: unsupported blocksize for dax\n", bdev);
                return false;
        }
 
        if (!dax_dev) {
-               pr_debug("%s: error: dax unsupported by block device\n",
-                               bdevname(bdev, buf));
+               pr_debug("%pg: error: dax unsupported by block device\n", bdev);
                return false;
        }
 
        err = bdev_dax_pgoff(bdev, start, PAGE_SIZE, &pgoff);
        if (err) {
-               pr_info("%s: error: unaligned partition for dax\n",
-                               bdevname(bdev, buf));
+               pr_info("%pg: error: unaligned partition for dax\n", bdev);
                return false;
        }
 
        last_page = PFN_DOWN((start + sectors - 1) * 512) * PAGE_SIZE / 512;
        err = bdev_dax_pgoff(bdev, last_page, PAGE_SIZE, &pgoff_end);
        if (err) {
-               pr_info("%s: error: unaligned partition for dax\n",
-                               bdevname(bdev, buf));
+               pr_info("%pg: error: unaligned partition for dax\n", bdev);
                return false;
        }
 
@@ -112,8 +160,8 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
        len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn);
 
        if (len < 1 || len2 < 1) {
-               pr_info("%s: error: dax access failed (%ld)\n",
-                               bdevname(bdev, buf), len < 1 ? len : len2);
+               pr_info("%pg: error: dax access failed (%ld)\n",
+                               bdev, len < 1 ? len : len2);
                dax_read_unlock(id);
                return false;
        }
@@ -147,57 +195,32 @@ bool __generic_fsdax_supported(struct dax_device *dax_dev,
        dax_read_unlock(id);
 
        if (!dax_enabled) {
-               pr_info("%s: error: dax support not enabled\n",
-                               bdevname(bdev, buf));
+               pr_info("%pg: error: dax support not enabled\n", bdev);
                return false;
        }
        return true;
 }
-EXPORT_SYMBOL_GPL(__generic_fsdax_supported);
+EXPORT_SYMBOL_GPL(generic_fsdax_supported);
 
-/**
- * __bdev_dax_supported() - Check if the device supports dax for filesystem
- * @bdev: block device to check
- * @blocksize: The block size of the device
- *
- * This is a library function for filesystems to check if the block device
- * can be mounted with dax option.
- *
- * Return: true if supported, false if unsupported
- */
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize)
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+               int blocksize, sector_t start, sector_t len)
 {
-       struct dax_device *dax_dev;
-       struct request_queue *q;
-       char buf[BDEVNAME_SIZE];
-       bool ret;
+       bool ret = false;
        int id;
 
-       q = bdev_get_queue(bdev);
-       if (!q || !blk_queue_dax(q)) {
-               pr_debug("%s: error: request queue doesn't support dax\n",
-                               bdevname(bdev, buf));
-               return false;
-       }
-
-       dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
-       if (!dax_dev) {
-               pr_debug("%s: error: device does not support dax\n",
-                               bdevname(bdev, buf));
+       if (!dax_dev)
                return false;
-       }
 
        id = dax_read_lock();
-       ret = dax_supported(dax_dev, bdev, blocksize, 0,
-                       i_size_read(bdev->bd_inode) / 512);
+       if (dax_alive(dax_dev) && dax_dev->ops->dax_supported)
+               ret = dax_dev->ops->dax_supported(dax_dev, bdev, blocksize,
+                                                 start, len);
        dax_read_unlock(id);
-
-       put_dax(dax_dev);
-
        return ret;
 }
-EXPORT_SYMBOL_GPL(__bdev_dax_supported);
-#endif
+EXPORT_SYMBOL_GPL(dax_supported);
+#endif /* CONFIG_FS_DAX */
+#endif /* CONFIG_BLOCK */
 
 enum dax_device_flags {
        /* !alive + rcu grace period == no new operations / mappings */
@@ -208,24 +231,6 @@ enum dax_device_flags {
        DAXDEV_SYNC,
 };
 
-/**
- * struct dax_device - anchor object for dax services
- * @inode: core vfs
- * @cdev: optional character interface for "device dax"
- * @host: optional name for lookups where the device path is not available
- * @private: dax driver private data
- * @flags: state and boolean properties
- */
-struct dax_device {
-       struct hlist_node list;
-       struct inode inode;
-       struct cdev cdev;
-       const char *host;
-       void *private;
-       unsigned long flags;
-       const struct dax_operations *ops;
-};
-
 static ssize_t write_cache_show(struct device *dev,
                struct device_attribute *attr, char *buf)
 {
@@ -323,19 +328,6 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 }
 EXPORT_SYMBOL_GPL(dax_direct_access);
 
-bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
-               int blocksize, sector_t start, sector_t len)
-{
-       if (!dax_dev)
-               return false;
-
-       if (!dax_alive(dax_dev))
-               return false;
-
-       return dax_dev->ops->dax_supported(dax_dev, bdev, blocksize, start, len);
-}
-EXPORT_SYMBOL_GPL(dax_supported);
-
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
                size_t bytes, struct iov_iter *i)
 {
@@ -423,11 +415,6 @@ bool dax_alive(struct dax_device *dax_dev)
 }
 EXPORT_SYMBOL_GPL(dax_alive);
 
-static int dax_host_hash(const char *host)
-{
-       return hashlen_hash(hashlen_string("DAX", host)) % DAX_HASH_SIZE;
-}
-
 /*
  * Note, rcu is not protecting the liveness of dax_dev, rcu is ensuring
  * that any fault handlers or operations that might have seen
@@ -624,38 +611,6 @@ void put_dax(struct dax_device *dax_dev)
 }
 EXPORT_SYMBOL_GPL(put_dax);
 
-/**
- * dax_get_by_host() - temporary lookup mechanism for filesystem-dax
- * @host: alternate name for the device registered by a dax driver
- */
-struct dax_device *dax_get_by_host(const char *host)
-{
-       struct dax_device *dax_dev, *found = NULL;
-       int hash, id;
-
-       if (!host)
-               return NULL;
-
-       hash = dax_host_hash(host);
-
-       id = dax_read_lock();
-       spin_lock(&dax_host_lock);
-       hlist_for_each_entry(dax_dev, &dax_host_list[hash], list) {
-               if (!dax_alive(dax_dev)
-                               || strcmp(host, dax_dev->host) != 0)
-                       continue;
-
-               if (igrab(&dax_dev->inode))
-                       found = dax_dev;
-               break;
-       }
-       spin_unlock(&dax_host_lock);
-       dax_read_unlock(id);
-
-       return found;
-}
-EXPORT_SYMBOL_GPL(dax_get_by_host);
-
 /**
  * inode_dax: convert a public inode into its dax_dev
  * @inode: An inode with i_cdev pointing to a dax_dev
index 9561e3d..541efe0 100644 (file)
@@ -42,6 +42,7 @@ config UDMABUF
 config DMABUF_MOVE_NOTIFY
        bool "Move notify between drivers (EXPERIMENTAL)"
        default n
+       depends on DMA_SHARED_BUFFER
        help
          Don't pin buffers if the dynamic DMA-buf interface is available on
          both the exporter as well as the importer. This fixes a security
@@ -52,6 +53,7 @@ config DMABUF_MOVE_NOTIFY
 
 config DMABUF_DEBUG
        bool "DMA-BUF debug checks"
+       depends on DMA_SHARED_BUFFER
        default y if DMA_API_DEBUG
        help
          This option enables additional checks for DMA-BUF importers and
@@ -74,7 +76,7 @@ menuconfig DMABUF_HEAPS
 
 menuconfig DMABUF_SYSFS_STATS
        bool "DMA-BUF sysfs statistics"
-       select DMA_SHARED_BUFFER
+       depends on DMA_SHARED_BUFFER
        help
           Choose this option to enable DMA-BUF sysfs statistics
           in location /sys/kernel/dmabuf/buffers.
index 39b5b46..80c2c03 100644 (file)
@@ -277,10 +277,15 @@ config INTEL_IDMA64
          Enable DMA support for Intel Low Power Subsystem such as found on
          Intel Skylake PCH.
 
+config INTEL_IDXD_BUS
+       tristate
+       default INTEL_IDXD
+
 config INTEL_IDXD
        tristate "Intel Data Accelerators support"
-       depends on PCI && X86_64
+       depends on PCI && X86_64 && !UML
        depends on PCI_MSI
+       depends on PCI_PASID
        depends on SBITMAP
        select DMA_ENGINE
        help
@@ -291,6 +296,23 @@ config INTEL_IDXD
 
          If unsure, say N.
 
+config INTEL_IDXD_COMPAT
+       bool "Legacy behavior for idxd driver"
+       depends on PCI && X86_64
+       select INTEL_IDXD_BUS
+       help
+         Compatible driver to support old /sys/bus/dsa/drivers/dsa behavior.
+         The old behavior performed driver bind/unbind for device and wq
+         devices all under the dsa driver. The compat driver will emulate
+         the legacy behavior in order to allow existing support apps (i.e.
+         accel-config) to continue function. It is expected that accel-config
+         v3.2 and earlier will need the compat mode. A distro with later
+         accel-config version can disable this compat config.
+
+         Say Y if you have old applications that require such behavior.
+
+         If unsure, say N.
+
 # Config symbol that collects all the dependencies that's necessary to
 # support shared virtual memory for the devices supported by idxd.
 config INTEL_IDXD_SVM
@@ -315,7 +337,7 @@ config INTEL_IDXD_PERFMON
 
 config INTEL_IOATDMA
        tristate "Intel I/OAT DMA support"
-       depends on PCI && X86_64
+       depends on PCI && X86_64 && !UML
        select DMA_ENGINE
        select DMA_ENGINE_RAID
        select DCA
@@ -716,6 +738,8 @@ source "drivers/dma/bestcomm/Kconfig"
 
 source "drivers/dma/mediatek/Kconfig"
 
+source "drivers/dma/ptdma/Kconfig"
+
 source "drivers/dma/qcom/Kconfig"
 
 source "drivers/dma/dw/Kconfig"
index aa69094..616d926 100644 (file)
@@ -16,6 +16,7 @@ obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_ALTERA_MSGDMA) += altera-msgdma.o
 obj-$(CONFIG_AMBA_PL08X) += amba-pl08x.o
 obj-$(CONFIG_AMCC_PPC440SPE_ADMA) += ppc4xx/
+obj-$(CONFIG_AMD_PTDMA) += ptdma/
 obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
 obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
 obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
@@ -41,7 +42,7 @@ obj-$(CONFIG_IMX_DMA) += imx-dma.o
 obj-$(CONFIG_IMX_SDMA) += imx-sdma.o
 obj-$(CONFIG_INTEL_IDMA64) += idma64.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
-obj-$(CONFIG_INTEL_IDXD) += idxd/
+obj-y += idxd/
 obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
 obj-$(CONFIG_K3_DMA) += k3dma.o
 obj-$(CONFIG_LPC18XX_DMAMUX) += lpc18xx-dmamux.o
index 235f139..5906eae 100644 (file)
@@ -70,10 +70,22 @@ static int acpi_dma_parse_resource_group(const struct acpi_csrt_group *grp,
 
        si = (const struct acpi_csrt_shared_info *)&grp[1];
 
-       /* Match device by MMIO and IRQ */
+       /* Match device by MMIO */
        if (si->mmio_base_low != lower_32_bits(mem) ||
-           si->mmio_base_high != upper_32_bits(mem) ||
-           si->gsi_interrupt != irq)
+           si->mmio_base_high != upper_32_bits(mem))
+               return 0;
+
+       /*
+        * acpi_gsi_to_irq() can't be used because some platforms do not save
+        * registered IRQs in the MP table. Instead we just try to register
+        * the GSI, which is the core part of the above mentioned function.
+        */
+       ret = acpi_register_gsi(NULL, si->gsi_interrupt, si->interrupt_mode, si->interrupt_polarity);
+       if (ret < 0)
+               return 0;
+
+       /* Match device by Linux vIRQ */
+       if (ret != irq)
                return 0;
 
        dev_dbg(&adev->dev, "matches with %.4s%04X (rev %u)\n",
index 0fe0676..5a2c757 100644 (file)
@@ -691,10 +691,14 @@ static void msgdma_tasklet(struct tasklet_struct *t)
 
        spin_lock_irqsave(&mdev->lock, flags);
 
-       /* Read number of responses that are available */
-       count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL);
-       dev_dbg(mdev->dev, "%s (%d): response count=%d\n",
-               __func__, __LINE__, count);
+       if (mdev->resp) {
+               /* Read number of responses that are available */
+               count = ioread32(mdev->csr + MSGDMA_CSR_RESP_FILL_LEVEL);
+               dev_dbg(mdev->dev, "%s (%d): response count=%d\n",
+                       __func__, __LINE__, count);
+       } else {
+               count = 1;
+       }
 
        while (count--) {
                /*
@@ -703,8 +707,12 @@ static void msgdma_tasklet(struct tasklet_struct *t)
                 * have any real values, like transferred bytes or error
                 * bits. So we need to just drop these values.
                 */
-               size = ioread32(mdev->resp + MSGDMA_RESP_BYTES_TRANSFERRED);
-               status = ioread32(mdev->resp + MSGDMA_RESP_STATUS);
+               if (mdev->resp) {
+                       size = ioread32(mdev->resp +
+                                       MSGDMA_RESP_BYTES_TRANSFERRED);
+                       status = ioread32(mdev->resp +
+                                       MSGDMA_RESP_STATUS);
+               }
 
                msgdma_complete_descriptor(mdev);
                msgdma_chan_desc_cleanup(mdev);
@@ -757,14 +765,21 @@ static void msgdma_dev_remove(struct msgdma_device *mdev)
 }
 
 static int request_and_map(struct platform_device *pdev, const char *name,
-                          struct resource **res, void __iomem **ptr)
+                          struct resource **res, void __iomem **ptr,
+                          bool optional)
 {
        struct resource *region;
        struct device *device = &pdev->dev;
 
        *res = platform_get_resource_byname(pdev, IORESOURCE_MEM, name);
        if (*res == NULL) {
-               dev_err(device, "resource %s not defined\n", name);
+               if (optional) {
+                       *ptr = NULL;
+                       dev_info(device, "optional resource %s not defined\n",
+                                name);
+                       return 0;
+               }
+               dev_err(device, "mandatory resource %s not defined\n", name);
                return -ENODEV;
        }
 
@@ -805,17 +820,17 @@ static int msgdma_probe(struct platform_device *pdev)
        mdev->dev = &pdev->dev;
 
        /* Map CSR space */
-       ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr);
+       ret = request_and_map(pdev, "csr", &dma_res, &mdev->csr, false);
        if (ret)
                return ret;
 
        /* Map (extended) descriptor space */
-       ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc);
+       ret = request_and_map(pdev, "desc", &dma_res, &mdev->desc, false);
        if (ret)
                return ret;
 
        /* Map response space */
-       ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp);
+       ret = request_and_map(pdev, "resp", &dma_res, &mdev->resp, true);
        if (ret)
                return ret;
 
index 64a52bf..ab78e0f 100644 (file)
@@ -2240,10 +2240,16 @@ static struct platform_driver at_xdmac_driver = {
 
 static int __init at_xdmac_init(void)
 {
-       return platform_driver_probe(&at_xdmac_driver, at_xdmac_probe);
+       return platform_driver_register(&at_xdmac_driver);
 }
 subsys_initcall(at_xdmac_init);
 
+static void __exit at_xdmac_exit(void)
+{
+       platform_driver_unregister(&at_xdmac_driver);
+}
+module_exit(at_xdmac_exit);
+
 MODULE_DESCRIPTION("Atmel Extended DMA Controller driver");
 MODULE_AUTHOR("Ludovic Desroches <ludovic.desroches@atmel.com>");
 MODULE_LICENSE("GPL");
index d9e4ac3..35993ab 100644 (file)
@@ -363,12 +363,16 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
                        DWAXIDMAC_TT_FC_MEM_TO_PER_DST :
                        DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC)
                        << CH_CFG_H_TT_FC_POS;
+               if (chan->chip->apb_regs)
+                       reg |= (chan->id << CH_CFG_H_DST_PER_POS);
                break;
        case DMA_DEV_TO_MEM:
                reg |= (chan->config.device_fc ?
                        DWAXIDMAC_TT_FC_PER_TO_MEM_SRC :
                        DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC)
                        << CH_CFG_H_TT_FC_POS;
+               if (chan->chip->apb_regs)
+                       reg |= (chan->id << CH_CFG_H_SRC_PER_POS);
                break;
        default:
                break;
@@ -470,18 +474,13 @@ static void dma_chan_free_chan_resources(struct dma_chan *dchan)
        pm_runtime_put(chan->chip->dev);
 }
 
-static void dw_axi_dma_set_hw_channel(struct axi_dma_chip *chip,
-                                     u32 handshake_num, bool set)
+static void dw_axi_dma_set_hw_channel(struct axi_dma_chan *chan, bool set)
 {
-       unsigned long start = 0;
-       unsigned long reg_value;
-       unsigned long reg_mask;
-       unsigned long reg_set;
-       unsigned long mask;
-       unsigned long val;
+       struct axi_dma_chip *chip = chan->chip;
+       unsigned long reg_value, val;
 
        if (!chip->apb_regs) {
-               dev_dbg(chip->dev, "apb_regs not initialized\n");
+               dev_err(chip->dev, "apb_regs not initialized\n");
                return;
        }
 
@@ -490,26 +489,22 @@ static void dw_axi_dma_set_hw_channel(struct axi_dma_chip *chip,
         * Lock the DMA channel by assign a handshake number to the channel.
         * Unlock the DMA channel by assign 0x3F to the channel.
         */
-       if (set) {
-               reg_set = UNUSED_CHANNEL;
-               val = handshake_num;
-       } else {
-               reg_set = handshake_num;
+       if (set)
+               val = chan->hw_handshake_num;
+       else
                val = UNUSED_CHANNEL;
-       }
 
        reg_value = lo_hi_readq(chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
 
-       for_each_set_clump8(start, reg_mask, &reg_value, 64) {
-               if (reg_mask == reg_set) {
-                       mask = GENMASK_ULL(start + 7, start);
-                       reg_value &= ~mask;
-                       reg_value |= rol64(val, start);
-                       lo_hi_writeq(reg_value,
-                                    chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
-                       break;
-               }
-       }
+       /* Channel is already allocated, set handshake as per channel ID */
+       /* 64 bit write should handle for 8 channels */
+
+       reg_value &= ~(DMA_APB_HS_SEL_MASK <<
+                       (chan->id * DMA_APB_HS_SEL_BIT_SIZE));
+       reg_value |= (val << (chan->id * DMA_APB_HS_SEL_BIT_SIZE));
+       lo_hi_writeq(reg_value, chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
+
+       return;
 }
 
 /*
@@ -742,7 +737,7 @@ dw_axi_dma_chan_prep_cyclic(struct dma_chan *dchan, dma_addr_t dma_addr,
                llp = hw_desc->llp;
        } while (total_segments);
 
-       dw_axi_dma_set_hw_channel(chan->chip, chan->hw_handshake_num, true);
+       dw_axi_dma_set_hw_channel(chan, true);
 
        return vchan_tx_prep(&chan->vc, &desc->vd, flags);
 
@@ -822,7 +817,7 @@ dw_axi_dma_chan_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
                llp = hw_desc->llp;
        } while (num_sgs);
 
-       dw_axi_dma_set_hw_channel(chan->chip, chan->hw_handshake_num, true);
+       dw_axi_dma_set_hw_channel(chan, true);
 
        return vchan_tx_prep(&chan->vc, &desc->vd, flags);
 
@@ -1098,8 +1093,7 @@ static int dma_chan_terminate_all(struct dma_chan *dchan)
                         "%s failed to stop\n", axi_chan_name(chan));
 
        if (chan->direction != DMA_MEM_TO_MEM)
-               dw_axi_dma_set_hw_channel(chan->chip,
-                                         chan->hw_handshake_num, false);
+               dw_axi_dma_set_hw_channel(chan, false);
        if (chan->direction == DMA_MEM_TO_DEV)
                dw_axi_dma_set_byte_halfword(chan, false);
 
@@ -1296,7 +1290,7 @@ static int parse_device_properties(struct axi_dma_chip *chip)
                        return -EINVAL;
 
                chip->dw->hdata->restrict_axi_burst_len = true;
-               chip->dw->hdata->axi_rw_burst_len = tmp - 1;
+               chip->dw->hdata->axi_rw_burst_len = tmp;
        }
 
        return 0;
@@ -1365,7 +1359,6 @@ static int dw_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-
        INIT_LIST_HEAD(&dw->dma.channels);
        for (i = 0; i < hdata->nr_channels; i++) {
                struct axi_dma_chan *chan = &dw->chan[i];
@@ -1386,6 +1379,7 @@ static int dw_probe(struct platform_device *pdev)
 
        /* DMA capabilities */
        dw->dma.chancnt = hdata->nr_channels;
+       dw->dma.max_burst = hdata->axi_rw_burst_len;
        dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS;
        dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS;
        dw->dma.directions = BIT(DMA_MEM_TO_MEM);
index b698978..380005a 100644 (file)
@@ -184,6 +184,8 @@ static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
 #define DMAC_APB_HALFWORD_WR_CH_EN     0x020 /* DMAC Halfword write enables */
 
 #define UNUSED_CHANNEL         0x3F /* Set unused DMA channel to 0x3F */
+#define DMA_APB_HS_SEL_BIT_SIZE        0x08 /* HW handshake bits per channel */
+#define DMA_APB_HS_SEL_MASK    0xFF /* HW handshake select masks */
 #define MAX_BLOCK_SIZE         0x1000 /* 1024 blocks * 4 bytes data width */
 
 /* DMAC_CFG */
@@ -256,6 +258,8 @@ enum {
 
 /* CH_CFG_H */
 #define CH_CFG_H_PRIORITY_POS          17
+#define CH_CFG_H_DST_PER_POS           12
+#define CH_CFG_H_SRC_PER_POS           7
 #define CH_CFG_H_HS_SEL_DST_POS                4
 #define CH_CFG_H_HS_SEL_SRC_POS                3
 enum {
index 3ce44de..58f4078 100644 (file)
 // SPDX-License-Identifier: GPL-2.0
-// Copyright (C) 2013,2018 Intel Corporation
+// Copyright (C) 2013,2018,2020-2021 Intel Corporation
 
 #include <linux/bitops.h>
 #include <linux/dmaengine.h>
 #include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 
 #include "internal.h"
 
-static void idma32_initialize_chan(struct dw_dma_chan *dwc)
+#define DMA_CTL_CH(x)                  (0x1000 + (x) * 4)
+#define DMA_SRC_ADDR_FILLIN(x)         (0x1100 + (x) * 4)
+#define DMA_DST_ADDR_FILLIN(x)         (0x1200 + (x) * 4)
+#define DMA_XBAR_SEL(x)                        (0x1300 + (x) * 4)
+#define DMA_REGACCESS_CHID_CFG         (0x1400)
+
+#define CTL_CH_TRANSFER_MODE_MASK      GENMASK(1, 0)
+#define CTL_CH_TRANSFER_MODE_S2S       0
+#define CTL_CH_TRANSFER_MODE_S2D       1
+#define CTL_CH_TRANSFER_MODE_D2S       2
+#define CTL_CH_TRANSFER_MODE_D2D       3
+#define CTL_CH_RD_RS_MASK              GENMASK(4, 3)
+#define CTL_CH_WR_RS_MASK              GENMASK(6, 5)
+#define CTL_CH_RD_NON_SNOOP_BIT                BIT(8)
+#define CTL_CH_WR_NON_SNOOP_BIT                BIT(9)
+
+#define XBAR_SEL_DEVID_MASK            GENMASK(15, 0)
+#define XBAR_SEL_RX_TX_BIT             BIT(16)
+#define XBAR_SEL_RX_TX_SHIFT           16
+
+#define REGACCESS_CHID_MASK            GENMASK(2, 0)
+
+static unsigned int idma32_get_slave_devfn(struct dw_dma_chan *dwc)
+{
+       struct device *slave = dwc->chan.slave;
+
+       if (!slave || !dev_is_pci(slave))
+               return 0;
+
+       return to_pci_dev(slave)->devfn;
+}
+
+static void idma32_initialize_chan_xbar(struct dw_dma_chan *dwc)
+{
+       struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+       void __iomem *misc = __dw_regs(dw);
+       u32 cfghi = 0, cfglo = 0;
+       u8 dst_id, src_id;
+       u32 value;
+
+       /* DMA Channel ID Configuration register must be programmed first */
+       value = readl(misc + DMA_REGACCESS_CHID_CFG);
+
+       value &= ~REGACCESS_CHID_MASK;
+       value |= dwc->chan.chan_id;
+
+       writel(value, misc + DMA_REGACCESS_CHID_CFG);
+
+       /* Configure channel attributes */
+       value = readl(misc + DMA_CTL_CH(dwc->chan.chan_id));
+
+       value &= ~(CTL_CH_RD_NON_SNOOP_BIT | CTL_CH_WR_NON_SNOOP_BIT);
+       value &= ~(CTL_CH_RD_RS_MASK | CTL_CH_WR_RS_MASK);
+       value &= ~CTL_CH_TRANSFER_MODE_MASK;
+
+       switch (dwc->direction) {
+       case DMA_MEM_TO_DEV:
+               value |= CTL_CH_TRANSFER_MODE_D2S;
+               value |= CTL_CH_WR_NON_SNOOP_BIT;
+               break;
+       case DMA_DEV_TO_MEM:
+               value |= CTL_CH_TRANSFER_MODE_S2D;
+               value |= CTL_CH_RD_NON_SNOOP_BIT;
+               break;
+       default:
+               /*
+                * Memory-to-Memory and Device-to-Device are ignored for now.
+                *
+                * For Memory-to-Memory transfers we would need to set mode
+                * and disable snooping on both sides.
+                */
+               return;
+       }
+
+       writel(value, misc + DMA_CTL_CH(dwc->chan.chan_id));
+
+       /* Configure crossbar selection */
+       value = readl(misc + DMA_XBAR_SEL(dwc->chan.chan_id));
+
+       /* DEVFN selection */
+       value &= ~XBAR_SEL_DEVID_MASK;
+       value |= idma32_get_slave_devfn(dwc);
+
+       switch (dwc->direction) {
+       case DMA_MEM_TO_DEV:
+               value |= XBAR_SEL_RX_TX_BIT;
+               break;
+       case DMA_DEV_TO_MEM:
+               value &= ~XBAR_SEL_RX_TX_BIT;
+               break;
+       default:
+               /* Memory-to-Memory and Device-to-Device are ignored for now */
+               return;
+       }
+
+       writel(value, misc + DMA_XBAR_SEL(dwc->chan.chan_id));
+
+       /* Configure DMA channel low and high registers */
+       switch (dwc->direction) {
+       case DMA_MEM_TO_DEV:
+               dst_id = dwc->chan.chan_id;
+               src_id = dwc->dws.src_id;
+               break;
+       case DMA_DEV_TO_MEM:
+               dst_id = dwc->dws.dst_id;
+               src_id = dwc->chan.chan_id;
+               break;
+       default:
+               /* Memory-to-Memory and Device-to-Device are ignored for now */
+               return;
+       }
+
+       /* Set default burst alignment */
+       cfglo |= IDMA32C_CFGL_DST_BURST_ALIGN | IDMA32C_CFGL_SRC_BURST_ALIGN;
+
+       /* Low 4 bits of the request lines */
+       cfghi |= IDMA32C_CFGH_DST_PER(dst_id & 0xf);
+       cfghi |= IDMA32C_CFGH_SRC_PER(src_id & 0xf);
+
+       /* Request line extension (2 bits) */
+       cfghi |= IDMA32C_CFGH_DST_PER_EXT(dst_id >> 4 & 0x3);
+       cfghi |= IDMA32C_CFGH_SRC_PER_EXT(src_id >> 4 & 0x3);
+
+       channel_writel(dwc, CFG_LO, cfglo);
+       channel_writel(dwc, CFG_HI, cfghi);
+}
+
+static void idma32_initialize_chan_generic(struct dw_dma_chan *dwc)
 {
        u32 cfghi = 0;
        u32 cfglo = 0;
@@ -134,7 +263,10 @@ int idma32_dma_probe(struct dw_dma_chip *chip)
                return -ENOMEM;
 
        /* Channel operations */
-       dw->initialize_chan = idma32_initialize_chan;
+       if (chip->pdata->quirks & DW_DMA_QUIRK_XBAR_PRESENT)
+               dw->initialize_chan = idma32_initialize_chan_xbar;
+       else
+               dw->initialize_chan = idma32_initialize_chan_generic;
        dw->suspend_chan = idma32_suspend_chan;
        dw->resume_chan = idma32_resume_chan;
        dw->prepare_ctllo = idma32_prepare_ctllo;
index 2e1c52e..563ce73 100644 (file)
@@ -74,4 +74,20 @@ static __maybe_unused const struct dw_dma_chip_pdata idma32_chip_pdata = {
        .remove = idma32_dma_remove,
 };
 
+static const struct dw_dma_platform_data xbar_pdata = {
+       .nr_channels = 8,
+       .chan_allocation_order = CHAN_ALLOCATION_ASCENDING,
+       .chan_priority = CHAN_PRIORITY_ASCENDING,
+       .block_size = 131071,
+       .nr_masters = 1,
+       .data_width = {4},
+       .quirks = DW_DMA_QUIRK_XBAR_PRESENT,
+};
+
+static __maybe_unused const struct dw_dma_chip_pdata xbar_chip_pdata = {
+       .pdata = &xbar_pdata,
+       .probe = idma32_dma_probe,
+       .remove = idma32_dma_remove,
+};
+
 #endif /* _DMA_DW_INTERNAL_H */
index c1cf767..523ca80 100644 (file)
@@ -50,15 +50,10 @@ struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev)
 {
        struct device_node *np = pdev->dev.of_node;
        struct dw_dma_platform_data *pdata;
-       u32 tmp, arr[DW_DMA_MAX_NR_MASTERS], mb[DW_DMA_MAX_NR_CHANNELS];
+       u32 tmp, arr[DW_DMA_MAX_NR_MASTERS];
        u32 nr_masters;
        u32 nr_channels;
 
-       if (!np) {
-               dev_err(&pdev->dev, "Missing DT data\n");
-               return NULL;
-       }
-
        if (of_property_read_u32(np, "dma-masters", &nr_masters))
                return NULL;
        if (nr_masters < 1 || nr_masters > DW_DMA_MAX_NR_MASTERS)
@@ -76,41 +71,29 @@ struct dw_dma_platform_data *dw_dma_parse_dt(struct platform_device *pdev)
        pdata->nr_masters = nr_masters;
        pdata->nr_channels = nr_channels;
 
-       if (!of_property_read_u32(np, "chan_allocation_order", &tmp))
-               pdata->chan_allocation_order = (unsigned char)tmp;
+       of_property_read_u32(np, "chan_allocation_order", &pdata->chan_allocation_order);
+       of_property_read_u32(np, "chan_priority", &pdata->chan_priority);
 
-       if (!of_property_read_u32(np, "chan_priority", &tmp))
-               pdata->chan_priority = tmp;
+       of_property_read_u32(np, "block_size", &pdata->block_size);
 
-       if (!of_property_read_u32(np, "block_size", &tmp))
-               pdata->block_size = tmp;
-
-       if (!of_property_read_u32_array(np, "data-width", arr, nr_masters)) {
-               for (tmp = 0; tmp < nr_masters; tmp++)
-                       pdata->data_width[tmp] = arr[tmp];
-       } else if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
+       /* Try deprecated property first */
+       if (!of_property_read_u32_array(np, "data_width", arr, nr_masters)) {
                for (tmp = 0; tmp < nr_masters; tmp++)
                        pdata->data_width[tmp] = BIT(arr[tmp] & 0x07);
        }
 
-       if (!of_property_read_u32_array(np, "multi-block", mb, nr_channels)) {
-               for (tmp = 0; tmp < nr_channels; tmp++)
-                       pdata->multi_block[tmp] = mb[tmp];
-       } else {
-               for (tmp = 0; tmp < nr_channels; tmp++)
-                       pdata->multi_block[tmp] = 1;
-       }
+       /* If "data_width" and "data-width" both provided use the latter one */
+       of_property_read_u32_array(np, "data-width", pdata->data_width, nr_masters);
 
-       if (of_property_read_u32_array(np, "snps,max-burst-len", pdata->max_burst,
-                                      nr_channels)) {
-               memset32(pdata->max_burst, DW_DMA_MAX_BURST, nr_channels);
-       }
+       memset32(pdata->multi_block, 1, nr_channels);
+       of_property_read_u32_array(np, "multi-block", pdata->multi_block, nr_channels);
 
-       if (!of_property_read_u32(np, "snps,dma-protection-control", &tmp)) {
-               if (tmp > CHAN_PROTCTL_MASK)
-                       return NULL;
-               pdata->protctl = tmp;
-       }
+       memset32(pdata->max_burst, DW_DMA_MAX_BURST, nr_channels);
+       of_property_read_u32_array(np, "snps,max-burst-len", pdata->max_burst, nr_channels);
+
+       of_property_read_u32(np, "snps,dma-protection-control", &pdata->protctl);
+       if (pdata->protctl > CHAN_PROTCTL_MASK)
+               return NULL;
 
        return pdata;
 }
index 1142aa6..26a3f92 100644 (file)
@@ -120,9 +120,9 @@ static const struct pci_device_id dw_pci_id_table[] = {
        { PCI_VDEVICE(INTEL, 0x22c0), (kernel_ulong_t)&dw_dma_chip_pdata },
 
        /* Elkhart Lake iDMA 32-bit (PSE DMA) */
-       { PCI_VDEVICE(INTEL, 0x4bb4), (kernel_ulong_t)&idma32_chip_pdata },
-       { PCI_VDEVICE(INTEL, 0x4bb5), (kernel_ulong_t)&idma32_chip_pdata },
-       { PCI_VDEVICE(INTEL, 0x4bb6), (kernel_ulong_t)&idma32_chip_pdata },
+       { PCI_VDEVICE(INTEL, 0x4bb4), (kernel_ulong_t)&xbar_chip_pdata },
+       { PCI_VDEVICE(INTEL, 0x4bb5), (kernel_ulong_t)&xbar_chip_pdata },
+       { PCI_VDEVICE(INTEL, 0x4bb6), (kernel_ulong_t)&xbar_chip_pdata },
 
        /* Haswell */
        { PCI_VDEVICE(INTEL, 0x9c60), (kernel_ulong_t)&dw_dma_chip_pdata },
index 0585d74..2461189 100644 (file)
@@ -149,9 +149,9 @@ static const struct acpi_device_id dw_dma_acpi_id_table[] = {
        { "808622C0", (kernel_ulong_t)&dw_dma_chip_pdata },
 
        /* Elkhart Lake iDMA 32-bit (PSE DMA) */
-       { "80864BB4", (kernel_ulong_t)&idma32_chip_pdata },
-       { "80864BB5", (kernel_ulong_t)&idma32_chip_pdata },
-       { "80864BB6", (kernel_ulong_t)&idma32_chip_pdata },
+       { "80864BB4", (kernel_ulong_t)&xbar_chip_pdata },
+       { "80864BB5", (kernel_ulong_t)&xbar_chip_pdata },
+       { "80864BB6", (kernel_ulong_t)&xbar_chip_pdata },
 
        { }
 };
index 0102777..98f9ee7 100644 (file)
@@ -897,7 +897,7 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
        if (data && data->name)
                name = data->name;
 
-       ret = clk_enable(edmac->clk);
+       ret = clk_prepare_enable(edmac->clk);
        if (ret)
                return ret;
 
@@ -936,7 +936,7 @@ static int ep93xx_dma_alloc_chan_resources(struct dma_chan *chan)
 fail_free_irq:
        free_irq(edmac->irq, edmac);
 fail_clk_disable:
-       clk_disable(edmac->clk);
+       clk_disable_unprepare(edmac->clk);
 
        return ret;
 }
@@ -969,7 +969,7 @@ static void ep93xx_dma_free_chan_resources(struct dma_chan *chan)
        list_for_each_entry_safe(desc, d, &list, node)
                kfree(desc);
 
-       clk_disable(edmac->clk);
+       clk_disable_unprepare(edmac->clk);
        free_irq(edmac->irq, edmac);
 }
 
index 4ae0579..8dd40d0 100644 (file)
@@ -291,9 +291,8 @@ static void dpaa2_qdma_issue_pending(struct dma_chan *chan)
 
                err = dpaa2_io_service_enqueue_fq(NULL, dpaa2_chan->fqid, fd);
                if (err) {
-                       list_del(&dpaa2_comp->list);
-                       list_add_tail(&dpaa2_comp->list,
-                                     &dpaa2_chan->comp_free);
+                       list_move_tail(&dpaa2_comp->list,
+                                      &dpaa2_chan->comp_free);
                }
        }
 err_enqueue:
@@ -626,8 +625,7 @@ static void dpaa2_qdma_free_desc(struct virt_dma_desc *vdesc)
        dpaa2_comp = to_fsl_qdma_comp(vdesc);
        qchan = dpaa2_comp->qchan;
        spin_lock_irqsave(&qchan->queue_lock, flags);
-       list_del(&dpaa2_comp->list);
-       list_add_tail(&dpaa2_comp->list, &qchan->comp_free);
+       list_move_tail(&dpaa2_comp->list, &qchan->comp_free);
        spin_unlock_irqrestore(&qchan->queue_lock, flags);
 }
 
@@ -703,7 +701,7 @@ static int dpaa2_qdma_probe(struct fsl_mc_device *dpdmai_dev)
        /* DPDMAI enable */
        err = dpdmai_enable(priv->mc_io, 0, dpdmai_dev->mc_handle);
        if (err) {
-               dev_err(dev, "dpdmai_enable() faile\n");
+               dev_err(dev, "dpdmai_enable() failed\n");
                goto err_enable;
        }
 
index a259ee0..c855a0e 100644 (file)
@@ -133,11 +133,6 @@ static inline void hisi_dma_update_bit(void __iomem *addr, u32 pos, bool val)
        writel_relaxed(tmp, addr);
 }
 
-static void hisi_dma_free_irq_vectors(void *data)
-{
-       pci_free_irq_vectors(data);
-}
-
 static void hisi_dma_pause_dma(struct hisi_dma_dev *hdma_dev, u32 index,
                               bool pause)
 {
@@ -544,6 +539,7 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        pci_set_drvdata(pdev, hdma_dev);
        pci_set_master(pdev);
 
+       /* This will be freed by 'pcim_release()'. See 'pcim_enable_device()' */
        ret = pci_alloc_irq_vectors(pdev, HISI_DMA_MSI_NUM, HISI_DMA_MSI_NUM,
                                    PCI_IRQ_MSI);
        if (ret < 0) {
@@ -551,10 +547,6 @@ static int hisi_dma_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                return ret;
        }
 
-       ret = devm_add_action_or_reset(dev, hisi_dma_free_irq_vectors, pdev);
-       if (ret)
-               return ret;
-
        dma_dev = &hdma_dev->dma_dev;
        dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
        dma_dev->device_free_chan_resources = hisi_dma_free_chan_resources;
index 6d11558..a1e9f2b 100644 (file)
@@ -1,4 +1,12 @@
+ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=IDXD
+
 obj-$(CONFIG_INTEL_IDXD) += idxd.o
 idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
 
 idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o
+
+obj-$(CONFIG_INTEL_IDXD_BUS) += idxd_bus.o
+idxd_bus-y := bus.o
+
+obj-$(CONFIG_INTEL_IDXD_COMPAT) += idxd_compat.o
+idxd_compat-y := compat.o
diff --git a/drivers/dma/idxd/bus.c b/drivers/dma/idxd/bus.c
new file mode 100644 (file)
index 0000000..6f84621
--- /dev/null
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include "idxd.h"
+
+
+int __idxd_driver_register(struct idxd_device_driver *idxd_drv, struct module *owner,
+                          const char *mod_name)
+{
+       struct device_driver *drv = &idxd_drv->drv;
+
+       if (!idxd_drv->type) {
+               pr_debug("driver type not set (%ps)\n", __builtin_return_address(0));
+               return -EINVAL;
+       }
+
+       drv->name = idxd_drv->name;
+       drv->bus = &dsa_bus_type;
+       drv->owner = owner;
+       drv->mod_name = mod_name;
+
+       return driver_register(drv);
+}
+EXPORT_SYMBOL_GPL(__idxd_driver_register);
+
+void idxd_driver_unregister(struct idxd_device_driver *idxd_drv)
+{
+       driver_unregister(&idxd_drv->drv);
+}
+EXPORT_SYMBOL_GPL(idxd_driver_unregister);
+
+static int idxd_config_bus_match(struct device *dev,
+                                struct device_driver *drv)
+{
+       struct idxd_device_driver *idxd_drv =
+               container_of(drv, struct idxd_device_driver, drv);
+       struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+       int i = 0;
+
+       while (idxd_drv->type[i] != IDXD_DEV_NONE) {
+               if (idxd_dev->type == idxd_drv->type[i])
+                       return 1;
+               i++;
+       }
+
+       return 0;
+}
+
+static int idxd_config_bus_probe(struct device *dev)
+{
+       struct idxd_device_driver *idxd_drv =
+               container_of(dev->driver, struct idxd_device_driver, drv);
+       struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+       return idxd_drv->probe(idxd_dev);
+}
+
+static void idxd_config_bus_remove(struct device *dev)
+{
+       struct idxd_device_driver *idxd_drv =
+               container_of(dev->driver, struct idxd_device_driver, drv);
+       struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+       idxd_drv->remove(idxd_dev);
+}
+
+struct bus_type dsa_bus_type = {
+       .name = "dsa",
+       .match = idxd_config_bus_match,
+       .probe = idxd_config_bus_probe,
+       .remove = idxd_config_bus_remove,
+};
+EXPORT_SYMBOL_GPL(dsa_bus_type);
+
+static int __init dsa_bus_init(void)
+{
+       return bus_register(&dsa_bus_type);
+}
+module_init(dsa_bus_init);
+
+static void __exit dsa_bus_exit(void)
+{
+       bus_unregister(&dsa_bus_type);
+}
+module_exit(dsa_bus_exit);
+
+MODULE_DESCRIPTION("IDXD driver dsa_bus_type driver");
+MODULE_LICENSE("GPL v2");
index e9def57..b9b2b4a 100644 (file)
@@ -41,7 +41,7 @@ struct idxd_user_context {
 
 static void idxd_cdev_dev_release(struct device *dev)
 {
-       struct idxd_cdev *idxd_cdev = container_of(dev, struct idxd_cdev, dev);
+       struct idxd_cdev *idxd_cdev = dev_to_cdev(dev);
        struct idxd_cdev_context *cdev_ctx;
        struct idxd_wq *wq = idxd_cdev->wq;
 
@@ -218,14 +218,13 @@ static __poll_t idxd_cdev_poll(struct file *filp,
        struct idxd_user_context *ctx = filp->private_data;
        struct idxd_wq *wq = ctx->wq;
        struct idxd_device *idxd = wq->idxd;
-       unsigned long flags;
        __poll_t out = 0;
 
        poll_wait(filp, &wq->err_queue, wait);
-       spin_lock_irqsave(&idxd->dev_lock, flags);
+       spin_lock(&idxd->dev_lock);
        if (idxd->sw_err.valid)
                out = EPOLLIN | EPOLLRDNORM;
-       spin_unlock_irqrestore(&idxd->dev_lock, flags);
+       spin_unlock(&idxd->dev_lock);
 
        return out;
 }
@@ -256,9 +255,10 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
        if (!idxd_cdev)
                return -ENOMEM;
 
+       idxd_cdev->idxd_dev.type = IDXD_DEV_CDEV;
        idxd_cdev->wq = wq;
        cdev = &idxd_cdev->cdev;
-       dev = &idxd_cdev->dev;
+       dev = cdev_dev(idxd_cdev);
        cdev_ctx = &ictx[wq->idxd->data->type];
        minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
        if (minor < 0) {
@@ -268,7 +268,7 @@ int idxd_wq_add_cdev(struct idxd_wq *wq)
        idxd_cdev->minor = minor;
 
        device_initialize(dev);
-       dev->parent = &wq->conf_dev;
+       dev->parent = wq_confdev(wq);
        dev->bus = &dsa_bus_type;
        dev->type = &idxd_cdev_device_type;
        dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
@@ -299,10 +299,67 @@ void idxd_wq_del_cdev(struct idxd_wq *wq)
 
        idxd_cdev = wq->idxd_cdev;
        wq->idxd_cdev = NULL;
-       cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev);
-       put_device(&idxd_cdev->dev);
+       cdev_device_del(&idxd_cdev->cdev, cdev_dev(idxd_cdev));
+       put_device(cdev_dev(idxd_cdev));
 }
 
+static int idxd_user_drv_probe(struct idxd_dev *idxd_dev)
+{
+       struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+       struct idxd_device *idxd = wq->idxd;
+       int rc;
+
+       if (idxd->state != IDXD_DEV_ENABLED)
+               return -ENXIO;
+
+       mutex_lock(&wq->wq_lock);
+       wq->type = IDXD_WQT_USER;
+       rc = __drv_enable_wq(wq);
+       if (rc < 0)
+               goto err;
+
+       rc = idxd_wq_add_cdev(wq);
+       if (rc < 0) {
+               idxd->cmd_status = IDXD_SCMD_CDEV_ERR;
+               goto err_cdev;
+       }
+
+       idxd->cmd_status = 0;
+       mutex_unlock(&wq->wq_lock);
+       return 0;
+
+err_cdev:
+       __drv_disable_wq(wq);
+err:
+       wq->type = IDXD_WQT_NONE;
+       mutex_unlock(&wq->wq_lock);
+       return rc;
+}
+
+static void idxd_user_drv_remove(struct idxd_dev *idxd_dev)
+{
+       struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+
+       mutex_lock(&wq->wq_lock);
+       idxd_wq_del_cdev(wq);
+       __drv_disable_wq(wq);
+       wq->type = IDXD_WQT_NONE;
+       mutex_unlock(&wq->wq_lock);
+}
+
+static enum idxd_dev_type dev_types[] = {
+       IDXD_DEV_WQ,
+       IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_user_drv = {
+       .probe = idxd_user_drv_probe,
+       .remove = idxd_user_drv_remove,
+       .name = "user",
+       .type = dev_types,
+};
+EXPORT_SYMBOL_GPL(idxd_user_drv);
+
 int idxd_cdev_register(void)
 {
        int rc, i;
diff --git a/drivers/dma/idxd/compat.c b/drivers/dma/idxd/compat.c
new file mode 100644 (file)
index 0000000..3df2161
--- /dev/null
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2021 Intel Corporation. All rights rsvd. */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/device/bus.h>
+#include "idxd.h"
+
+extern int device_driver_attach(struct device_driver *drv, struct device *dev);
+extern void device_driver_detach(struct device *dev);
+
+#define DRIVER_ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)        \
+       struct driver_attribute driver_attr_##_name =           \
+       __ATTR_IGNORE_LOCKDEP(_name, _mode, _show, _store)
+
+static ssize_t unbind_store(struct device_driver *drv, const char *buf, size_t count)
+{
+       struct bus_type *bus = drv->bus;
+       struct device *dev;
+       int rc = -ENODEV;
+
+       dev = bus_find_device_by_name(bus, NULL, buf);
+       if (dev && dev->driver) {
+               device_driver_detach(dev);
+               rc = count;
+       }
+
+       return rc;
+}
+static DRIVER_ATTR_IGNORE_LOCKDEP(unbind, 0200, NULL, unbind_store);
+
+static ssize_t bind_store(struct device_driver *drv, const char *buf, size_t count)
+{
+       struct bus_type *bus = drv->bus;
+       struct device *dev;
+       struct device_driver *alt_drv = NULL;
+       int rc = -ENODEV;
+       struct idxd_dev *idxd_dev;
+
+       dev = bus_find_device_by_name(bus, NULL, buf);
+       if (!dev || dev->driver || drv != &dsa_drv.drv)
+               return -ENODEV;
+
+       idxd_dev = confdev_to_idxd_dev(dev);
+       if (is_idxd_dev(idxd_dev)) {
+               alt_drv = driver_find("idxd", bus);
+       } else if (is_idxd_wq_dev(idxd_dev)) {
+               struct idxd_wq *wq = confdev_to_wq(dev);
+
+               if (is_idxd_wq_kernel(wq))
+                       alt_drv = driver_find("dmaengine", bus);
+               else if (is_idxd_wq_user(wq))
+                       alt_drv = driver_find("user", bus);
+       }
+       if (!alt_drv)
+               return -ENODEV;
+
+       rc = device_driver_attach(alt_drv, dev);
+       if (rc < 0)
+               return rc;
+
+       return count;
+}
+static DRIVER_ATTR_IGNORE_LOCKDEP(bind, 0200, NULL, bind_store);
+
+static struct attribute *dsa_drv_compat_attrs[] = {
+       &driver_attr_bind.attr,
+       &driver_attr_unbind.attr,
+       NULL,
+};
+
+static const struct attribute_group dsa_drv_compat_attr_group = {
+       .attrs = dsa_drv_compat_attrs,
+};
+
+static const struct attribute_group *dsa_drv_compat_groups[] = {
+       &dsa_drv_compat_attr_group,
+       NULL,
+};
+
+static int idxd_dsa_drv_probe(struct idxd_dev *idxd_dev)
+{
+       return -ENODEV;
+}
+
+static void idxd_dsa_drv_remove(struct idxd_dev *idxd_dev)
+{
+}
+
+static enum idxd_dev_type dev_types[] = {
+       IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver dsa_drv = {
+       .name = "dsa",
+       .probe = idxd_dsa_drv_probe,
+       .remove = idxd_dsa_drv_remove,
+       .type = dev_types,
+       .drv = {
+               .suppress_bind_attrs = true,
+               .groups = dsa_drv_compat_groups,
+       },
+};
+
+module_idxd_driver(dsa_drv);
+MODULE_IMPORT_NS(IDXD);
index 420b93f..83a5ff2 100644 (file)
@@ -15,6 +15,8 @@
 
 static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
                          u32 *status);
+static void idxd_device_wqs_clear_state(struct idxd_device *idxd);
+static void idxd_wq_disable_cleanup(struct idxd_wq *wq);
 
 /* Interrupt control bits */
 void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
@@ -139,8 +141,8 @@ int idxd_wq_alloc_resources(struct idxd_wq *wq)
        if (wq->type != IDXD_WQT_KERNEL)
                return 0;
 
-       wq->num_descs = wq->size;
-       num_descs = wq->size;
+       num_descs = wq_dedicated(wq) ? wq->size : wq->threshold;
+       wq->num_descs = num_descs;
 
        rc = alloc_hw_descs(wq, num_descs);
        if (rc < 0)
@@ -234,7 +236,7 @@ int idxd_wq_enable(struct idxd_wq *wq)
        return 0;
 }
 
-int idxd_wq_disable(struct idxd_wq *wq)
+int idxd_wq_disable(struct idxd_wq *wq, bool reset_config)
 {
        struct idxd_device *idxd = wq->idxd;
        struct device *dev = &idxd->pdev->dev;
@@ -255,6 +257,8 @@ int idxd_wq_disable(struct idxd_wq *wq)
                return -ENXIO;
        }
 
+       if (reset_config)
+               idxd_wq_disable_cleanup(wq);
        wq->state = IDXD_WQ_DISABLED;
        dev_dbg(dev, "WQ %d disabled\n", wq->id);
        return 0;
@@ -289,6 +293,7 @@ void idxd_wq_reset(struct idxd_wq *wq)
 
        operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
        idxd_cmd_exec(idxd, IDXD_CMD_RESET_WQ, operand, NULL);
+       idxd_wq_disable_cleanup(wq);
        wq->state = IDXD_WQ_DISABLED;
 }
 
@@ -315,6 +320,7 @@ void idxd_wq_unmap_portal(struct idxd_wq *wq)
 
        devm_iounmap(dev, wq->portal);
        wq->portal = NULL;
+       wq->portal_offset = 0;
 }
 
 void idxd_wqs_unmap_portal(struct idxd_device *idxd)
@@ -335,19 +341,18 @@ int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid)
        int rc;
        union wqcfg wqcfg;
        unsigned int offset;
-       unsigned long flags;
 
-       rc = idxd_wq_disable(wq);
+       rc = idxd_wq_disable(wq, false);
        if (rc < 0)
                return rc;
 
        offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX);
-       spin_lock_irqsave(&idxd->dev_lock, flags);
+       spin_lock(&idxd->dev_lock);
        wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset);
        wqcfg.pasid_en = 1;
        wqcfg.pasid = pasid;
        iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset);
-       spin_unlock_irqrestore(&idxd->dev_lock, flags);
+       spin_unlock(&idxd->dev_lock);
 
        rc = idxd_wq_enable(wq);
        if (rc < 0)
@@ -362,19 +367,18 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq)
        int rc;
        union wqcfg wqcfg;
        unsigned int offset;
-       unsigned long flags;
 
-       rc = idxd_wq_disable(wq);
+       rc = idxd_wq_disable(wq, false);
        if (rc < 0)
                return rc;
 
        offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_PASID_IDX);
-       spin_lock_irqsave(&idxd->dev_lock, flags);
+       spin_lock(&idxd->dev_lock);
        wqcfg.bits[WQCFG_PASID_IDX] = ioread32(idxd->reg_base + offset);
        wqcfg.pasid_en = 0;
        wqcfg.pasid = 0;
        iowrite32(wqcfg.bits[WQCFG_PASID_IDX], idxd->reg_base + offset);
-       spin_unlock_irqrestore(&idxd->dev_lock, flags);
+       spin_unlock(&idxd->dev_lock);
 
        rc = idxd_wq_enable(wq);
        if (rc < 0)
@@ -383,11 +387,11 @@ int idxd_wq_disable_pasid(struct idxd_wq *wq)
        return 0;
 }
 
-void idxd_wq_disable_cleanup(struct idxd_wq *wq)
+static void idxd_wq_disable_cleanup(struct idxd_wq *wq)
 {
        struct idxd_device *idxd = wq->idxd;
 
-       lockdep_assert_held(&idxd->dev_lock);
+       lockdep_assert_held(&wq->wq_lock);
        memset(wq->wqcfg, 0, idxd->wqcfg_size);
        wq->type = IDXD_WQT_NONE;
        wq->size = 0;
@@ -396,6 +400,7 @@ void idxd_wq_disable_cleanup(struct idxd_wq *wq)
        wq->priority = 0;
        wq->ats_dis = 0;
        clear_bit(WQ_FLAG_DEDICATED, &wq->flags);
+       clear_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags);
        memset(wq->name, 0, WQ_NAME_SIZE);
 }
 
@@ -455,7 +460,6 @@ int idxd_device_init_reset(struct idxd_device *idxd)
 {
        struct device *dev = &idxd->pdev->dev;
        union idxd_command_reg cmd;
-       unsigned long flags;
 
        if (idxd_device_is_halted(idxd)) {
                dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
@@ -465,13 +469,13 @@ int idxd_device_init_reset(struct idxd_device *idxd)
        memset(&cmd, 0, sizeof(cmd));
        cmd.cmd = IDXD_CMD_RESET_DEVICE;
        dev_dbg(dev, "%s: sending reset for init.\n", __func__);
-       spin_lock_irqsave(&idxd->cmd_lock, flags);
+       spin_lock(&idxd->cmd_lock);
        iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
 
        while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) &
               IDXD_CMDSTS_ACTIVE)
                cpu_relax();
-       spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+       spin_unlock(&idxd->cmd_lock);
        return 0;
 }
 
@@ -480,7 +484,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
 {
        union idxd_command_reg cmd;
        DECLARE_COMPLETION_ONSTACK(done);
-       unsigned long flags;
+       u32 stat;
 
        if (idxd_device_is_halted(idxd)) {
                dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
@@ -494,7 +498,7 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
        cmd.operand = operand;
        cmd.int_req = 1;
 
-       spin_lock_irqsave(&idxd->cmd_lock, flags);
+       spin_lock(&idxd->cmd_lock);
        wait_event_lock_irq(idxd->cmd_waitq,
                            !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags),
                            idxd->cmd_lock);
@@ -511,18 +515,18 @@ static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
         * After command submitted, release lock and go to sleep until
         * the command completes via interrupt.
         */
-       spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+       spin_unlock(&idxd->cmd_lock);
        wait_for_completion(&done);
-       spin_lock_irqsave(&idxd->cmd_lock, flags);
-       if (status) {
-               *status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
-               idxd->cmd_status = *status & GENMASK(7, 0);
-       }
+       stat = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+       spin_lock(&idxd->cmd_lock);
+       if (status)
+               *status = stat;
+       idxd->cmd_status = stat & GENMASK(7, 0);
 
        __clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
        /* Wake up other pending commands */
        wake_up(&idxd->cmd_waitq);
-       spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+       spin_unlock(&idxd->cmd_lock);
 }
 
 int idxd_device_enable(struct idxd_device *idxd)
@@ -548,27 +552,10 @@ int idxd_device_enable(struct idxd_device *idxd)
        return 0;
 }
 
-void idxd_device_wqs_clear_state(struct idxd_device *idxd)
-{
-       int i;
-
-       lockdep_assert_held(&idxd->dev_lock);
-
-       for (i = 0; i < idxd->max_wqs; i++) {
-               struct idxd_wq *wq = idxd->wqs[i];
-
-               if (wq->state == IDXD_WQ_ENABLED) {
-                       idxd_wq_disable_cleanup(wq);
-                       wq->state = IDXD_WQ_DISABLED;
-               }
-       }
-}
-
 int idxd_device_disable(struct idxd_device *idxd)
 {
        struct device *dev = &idxd->pdev->dev;
        u32 status;
-       unsigned long flags;
 
        if (!idxd_is_enabled(idxd)) {
                dev_dbg(dev, "Device is not enabled\n");
@@ -584,22 +571,20 @@ int idxd_device_disable(struct idxd_device *idxd)
                return -ENXIO;
        }
 
-       spin_lock_irqsave(&idxd->dev_lock, flags);
-       idxd_device_wqs_clear_state(idxd);
-       idxd->state = IDXD_DEV_CONF_READY;
-       spin_unlock_irqrestore(&idxd->dev_lock, flags);
+       spin_lock(&idxd->dev_lock);
+       idxd_device_clear_state(idxd);
+       idxd->state = IDXD_DEV_DISABLED;
+       spin_unlock(&idxd->dev_lock);
        return 0;
 }
 
 void idxd_device_reset(struct idxd_device *idxd)
 {
-       unsigned long flags;
-
        idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL);
-       spin_lock_irqsave(&idxd->dev_lock, flags);
-       idxd_device_wqs_clear_state(idxd);
-       idxd->state = IDXD_DEV_CONF_READY;
-       spin_unlock_irqrestore(&idxd->dev_lock, flags);
+       spin_lock(&idxd->dev_lock);
+       idxd_device_clear_state(idxd);
+       idxd->state = IDXD_DEV_DISABLED;
+       spin_unlock(&idxd->dev_lock);
 }
 
 void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid)
@@ -649,7 +634,6 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
        struct device *dev = &idxd->pdev->dev;
        u32 operand, status;
        union idxd_command_reg cmd;
-       unsigned long flags;
 
        if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)))
                return -EOPNOTSUPP;
@@ -667,13 +651,13 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
 
        dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand);
 
-       spin_lock_irqsave(&idxd->cmd_lock, flags);
+       spin_lock(&idxd->cmd_lock);
        iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
 
        while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE)
                cpu_relax();
        status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
-       spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+       spin_unlock(&idxd->cmd_lock);
 
        if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
                dev_dbg(dev, "release int handle failed: %#x\n", status);
@@ -685,6 +669,59 @@ int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
 }
 
 /* Device configuration bits */
+static void idxd_engines_clear_state(struct idxd_device *idxd)
+{
+       struct idxd_engine *engine;
+       int i;
+
+       lockdep_assert_held(&idxd->dev_lock);
+       for (i = 0; i < idxd->max_engines; i++) {
+               engine = idxd->engines[i];
+               engine->group = NULL;
+       }
+}
+
+static void idxd_groups_clear_state(struct idxd_device *idxd)
+{
+       struct idxd_group *group;
+       int i;
+
+       lockdep_assert_held(&idxd->dev_lock);
+       for (i = 0; i < idxd->max_groups; i++) {
+               group = idxd->groups[i];
+               memset(&group->grpcfg, 0, sizeof(group->grpcfg));
+               group->num_engines = 0;
+               group->num_wqs = 0;
+               group->use_token_limit = false;
+               group->tokens_allowed = 0;
+               group->tokens_reserved = 0;
+               group->tc_a = -1;
+               group->tc_b = -1;
+       }
+}
+
+static void idxd_device_wqs_clear_state(struct idxd_device *idxd)
+{
+       int i;
+
+       lockdep_assert_held(&idxd->dev_lock);
+       for (i = 0; i < idxd->max_wqs; i++) {
+               struct idxd_wq *wq = idxd->wqs[i];
+
+               if (wq->state == IDXD_WQ_ENABLED) {
+                       idxd_wq_disable_cleanup(wq);
+                       wq->state = IDXD_WQ_DISABLED;
+               }
+       }
+}
+
+void idxd_device_clear_state(struct idxd_device *idxd)
+{
+       idxd_groups_clear_state(idxd);
+       idxd_engines_clear_state(idxd);
+       idxd_device_wqs_clear_state(idxd);
+}
+
 void idxd_msix_perm_setup(struct idxd_device *idxd)
 {
        union msix_perm mperm;
@@ -773,6 +810,15 @@ static int idxd_groups_config_write(struct idxd_device *idxd)
        return 0;
 }
 
+static bool idxd_device_pasid_priv_enabled(struct idxd_device *idxd)
+{
+       struct pci_dev *pdev = idxd->pdev;
+
+       if (pdev->pasid_enabled && (pdev->pasid_features & PCI_PASID_CAP_PRIV))
+               return true;
+       return false;
+}
+
 static int idxd_wq_config_write(struct idxd_wq *wq)
 {
        struct idxd_device *idxd = wq->idxd;
@@ -796,6 +842,7 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
        wq->wqcfg->wq_size = wq->size;
 
        if (wq->size == 0) {
+               idxd->cmd_status = IDXD_SCMD_WQ_NO_SIZE;
                dev_warn(dev, "Incorrect work queue size: 0\n");
                return -EINVAL;
        }
@@ -804,7 +851,6 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
        wq->wqcfg->wq_thresh = wq->threshold;
 
        /* byte 8-11 */
-       wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL);
        if (wq_dedicated(wq))
                wq->wqcfg->mode = 1;
 
@@ -814,6 +860,25 @@ static int idxd_wq_config_write(struct idxd_wq *wq)
                        wq->wqcfg->pasid = idxd->pasid;
        }
 
+       /*
+        * Here the priv bit is set depending on the WQ type. priv = 1 if the
+        * WQ type is kernel to indicate privileged access. This setting only
+        * matters for dedicated WQ. According to the DSA spec:
+        * If the WQ is in dedicated mode, WQ PASID Enable is 1, and the
+        * Privileged Mode Enable field of the PCI Express PASID capability
+        * is 0, this field must be 0.
+        *
+        * In the case of a dedicated kernel WQ that is not able to support
+        * the PASID cap, then the configuration will be rejected.
+        */
+       wq->wqcfg->priv = !!(wq->type == IDXD_WQT_KERNEL);
+       if (wq_dedicated(wq) && wq->wqcfg->pasid_en &&
+           !idxd_device_pasid_priv_enabled(idxd) &&
+           wq->type == IDXD_WQT_KERNEL) {
+               idxd->cmd_status = IDXD_SCMD_WQ_NO_PRIV;
+               return -EOPNOTSUPP;
+       }
+
        wq->wqcfg->priority = wq->priority;
 
        if (idxd->hw.gen_cap.block_on_fault &&
@@ -931,6 +996,7 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
                        continue;
 
                if (wq_shared(wq) && !device_swq_supported(idxd)) {
+                       idxd->cmd_status = IDXD_SCMD_WQ_NO_SWQ_SUPPORT;
                        dev_warn(dev, "No shared wq support but configured.\n");
                        return -EINVAL;
                }
@@ -939,8 +1005,10 @@ static int idxd_wqs_setup(struct idxd_device *idxd)
                configured++;
        }
 
-       if (configured == 0)
+       if (configured == 0) {
+               idxd->cmd_status = IDXD_SCMD_WQ_NONE_CONFIGURED;
                return -EINVAL;
+       }
 
        return 0;
 }
@@ -1086,3 +1154,203 @@ int idxd_device_load_config(struct idxd_device *idxd)
 
        return 0;
 }
+
+int __drv_enable_wq(struct idxd_wq *wq)
+{
+       struct idxd_device *idxd = wq->idxd;
+       struct device *dev = &idxd->pdev->dev;
+       int rc = -ENXIO;
+
+       lockdep_assert_held(&wq->wq_lock);
+
+       if (idxd->state != IDXD_DEV_ENABLED) {
+               idxd->cmd_status = IDXD_SCMD_DEV_NOT_ENABLED;
+               goto err;
+       }
+
+       if (wq->state != IDXD_WQ_DISABLED) {
+               dev_dbg(dev, "wq %d already enabled.\n", wq->id);
+               idxd->cmd_status = IDXD_SCMD_WQ_ENABLED;
+               rc = -EBUSY;
+               goto err;
+       }
+
+       if (!wq->group) {
+               dev_dbg(dev, "wq %d not attached to group.\n", wq->id);
+               idxd->cmd_status = IDXD_SCMD_WQ_NO_GRP;
+               goto err;
+       }
+
+       if (strlen(wq->name) == 0) {
+               idxd->cmd_status = IDXD_SCMD_WQ_NO_NAME;
+               dev_dbg(dev, "wq %d name not set.\n", wq->id);
+               goto err;
+       }
+
+       /* Shared WQ checks */
+       if (wq_shared(wq)) {
+               if (!device_swq_supported(idxd)) {
+                       idxd->cmd_status = IDXD_SCMD_WQ_NO_SVM;
+                       dev_dbg(dev, "PASID not enabled and shared wq.\n");
+                       goto err;
+               }
+               /*
+                * Shared wq with the threshold set to 0 means the user
+                * did not set the threshold or transitioned from a
+                * dedicated wq but did not set threshold. A value
+                * of 0 would effectively disable the shared wq. The
+                * driver does not allow a value of 0 to be set for
+                * threshold via sysfs.
+                */
+               if (wq->threshold == 0) {
+                       idxd->cmd_status = IDXD_SCMD_WQ_NO_THRESH;
+                       dev_dbg(dev, "Shared wq and threshold 0.\n");
+                       goto err;
+               }
+       }
+
+       rc = 0;
+       spin_lock(&idxd->dev_lock);
+       if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+               rc = idxd_device_config(idxd);
+       spin_unlock(&idxd->dev_lock);
+       if (rc < 0) {
+               dev_dbg(dev, "Writing wq %d config failed: %d\n", wq->id, rc);
+               goto err;
+       }
+
+       rc = idxd_wq_enable(wq);
+       if (rc < 0) {
+               dev_dbg(dev, "wq %d enabling failed: %d\n", wq->id, rc);
+               goto err;
+       }
+
+       rc = idxd_wq_map_portal(wq);
+       if (rc < 0) {
+               idxd->cmd_status = IDXD_SCMD_WQ_PORTAL_ERR;
+               dev_dbg(dev, "wq %d portal mapping failed: %d\n", wq->id, rc);
+               goto err_map_portal;
+       }
+
+       wq->client_count = 0;
+       return 0;
+
+err_map_portal:
+       rc = idxd_wq_disable(wq, false);
+       if (rc < 0)
+               dev_dbg(dev, "wq %s disable failed\n", dev_name(wq_confdev(wq)));
+err:
+       return rc;
+}
+
+int drv_enable_wq(struct idxd_wq *wq)
+{
+       int rc;
+
+       mutex_lock(&wq->wq_lock);
+       rc = __drv_enable_wq(wq);
+       mutex_unlock(&wq->wq_lock);
+       return rc;
+}
+
+void __drv_disable_wq(struct idxd_wq *wq)
+{
+       struct idxd_device *idxd = wq->idxd;
+       struct device *dev = &idxd->pdev->dev;
+
+       lockdep_assert_held(&wq->wq_lock);
+
+       if (idxd_wq_refcount(wq))
+               dev_warn(dev, "Clients has claim on wq %d: %d\n",
+                        wq->id, idxd_wq_refcount(wq));
+
+       idxd_wq_unmap_portal(wq);
+
+       idxd_wq_drain(wq);
+       idxd_wq_reset(wq);
+
+       wq->client_count = 0;
+}
+
+void drv_disable_wq(struct idxd_wq *wq)
+{
+       mutex_lock(&wq->wq_lock);
+       __drv_disable_wq(wq);
+       mutex_unlock(&wq->wq_lock);
+}
+
+int idxd_device_drv_probe(struct idxd_dev *idxd_dev)
+{
+       struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
+       int rc = 0;
+
+       /*
+        * Device should be in disabled state for the idxd_drv to load. If it's in
+        * enabled state, then the device was altered outside of driver's control.
+        * If the state is in halted state, then we don't want to proceed.
+        */
+       if (idxd->state != IDXD_DEV_DISABLED) {
+               idxd->cmd_status = IDXD_SCMD_DEV_ENABLED;
+               return -ENXIO;
+       }
+
+       /* Device configuration */
+       spin_lock(&idxd->dev_lock);
+       if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+               rc = idxd_device_config(idxd);
+       spin_unlock(&idxd->dev_lock);
+       if (rc < 0)
+               return -ENXIO;
+
+       /* Start device */
+       rc = idxd_device_enable(idxd);
+       if (rc < 0)
+               return rc;
+
+       /* Setup DMA device without channels */
+       rc = idxd_register_dma_device(idxd);
+       if (rc < 0) {
+               idxd_device_disable(idxd);
+               idxd->cmd_status = IDXD_SCMD_DEV_DMA_ERR;
+               return rc;
+       }
+
+       idxd->cmd_status = 0;
+       return 0;
+}
+
+void idxd_device_drv_remove(struct idxd_dev *idxd_dev)
+{
+       struct device *dev = &idxd_dev->conf_dev;
+       struct idxd_device *idxd = idxd_dev_to_idxd(idxd_dev);
+       int i;
+
+       for (i = 0; i < idxd->max_wqs; i++) {
+               struct idxd_wq *wq = idxd->wqs[i];
+               struct device *wq_dev = wq_confdev(wq);
+
+               if (wq->state == IDXD_WQ_DISABLED)
+                       continue;
+               dev_warn(dev, "Active wq %d on disable %s.\n", i, dev_name(wq_dev));
+               device_release_driver(wq_dev);
+       }
+
+       idxd_unregister_dma_device(idxd);
+       idxd_device_disable(idxd);
+       if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+               idxd_device_reset(idxd);
+}
+
+static enum idxd_dev_type dev_types[] = {
+       IDXD_DEV_DSA,
+       IDXD_DEV_IAX,
+       IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_drv = {
+       .type = dev_types,
+       .probe = idxd_device_drv_probe,
+       .remove = idxd_device_drv_remove,
+       .name = "idxd",
+};
+EXPORT_SYMBOL_GPL(idxd_drv);
index 77439b6..e0f056c 100644 (file)
@@ -69,7 +69,11 @@ static inline void idxd_prep_desc_common(struct idxd_wq *wq,
        hw->src_addr = addr_f1;
        hw->dst_addr = addr_f2;
        hw->xfer_size = len;
-       hw->priv = !!(wq->type == IDXD_WQT_KERNEL);
+       /*
+        * For dedicated WQ, this field is ignored and HW will use the WQCFG.priv
+        * field instead. This field should be set to 1 for kernel descriptors.
+        */
+       hw->priv = 1;
        hw->completion_addr = compl;
 }
 
@@ -149,10 +153,8 @@ static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
        cookie = dma_cookie_assign(tx);
 
        rc = idxd_submit_desc(wq, desc);
-       if (rc < 0) {
-               idxd_free_desc(wq, desc);
+       if (rc < 0)
                return rc;
-       }
 
        return cookie;
 }
@@ -245,7 +247,7 @@ int idxd_register_dma_channel(struct idxd_wq *wq)
 
        wq->idxd_chan = idxd_chan;
        idxd_chan->wq = wq;
-       get_device(&wq->conf_dev);
+       get_device(wq_confdev(wq));
 
        return 0;
 }
@@ -260,5 +262,87 @@ void idxd_unregister_dma_channel(struct idxd_wq *wq)
        list_del(&chan->device_node);
        kfree(wq->idxd_chan);
        wq->idxd_chan = NULL;
-       put_device(&wq->conf_dev);
+       put_device(wq_confdev(wq));
 }
+
+static int idxd_dmaengine_drv_probe(struct idxd_dev *idxd_dev)
+{
+       struct device *dev = &idxd_dev->conf_dev;
+       struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+       struct idxd_device *idxd = wq->idxd;
+       int rc;
+
+       if (idxd->state != IDXD_DEV_ENABLED)
+               return -ENXIO;
+
+       mutex_lock(&wq->wq_lock);
+       wq->type = IDXD_WQT_KERNEL;
+       rc = __drv_enable_wq(wq);
+       if (rc < 0) {
+               dev_dbg(dev, "Enable wq %d failed: %d\n", wq->id, rc);
+               rc = -ENXIO;
+               goto err;
+       }
+
+       rc = idxd_wq_alloc_resources(wq);
+       if (rc < 0) {
+               idxd->cmd_status = IDXD_SCMD_WQ_RES_ALLOC_ERR;
+               dev_dbg(dev, "WQ resource alloc failed\n");
+               goto err_res_alloc;
+       }
+
+       rc = idxd_wq_init_percpu_ref(wq);
+       if (rc < 0) {
+               idxd->cmd_status = IDXD_SCMD_PERCPU_ERR;
+               dev_dbg(dev, "percpu_ref setup failed\n");
+               goto err_ref;
+       }
+
+       rc = idxd_register_dma_channel(wq);
+       if (rc < 0) {
+               idxd->cmd_status = IDXD_SCMD_DMA_CHAN_ERR;
+               dev_dbg(dev, "Failed to register dma channel\n");
+               goto err_dma;
+       }
+
+       idxd->cmd_status = 0;
+       mutex_unlock(&wq->wq_lock);
+       return 0;
+
+err_dma:
+       idxd_wq_quiesce(wq);
+err_ref:
+       idxd_wq_free_resources(wq);
+err_res_alloc:
+       __drv_disable_wq(wq);
+err:
+       wq->type = IDXD_WQT_NONE;
+       mutex_unlock(&wq->wq_lock);
+       return rc;
+}
+
+static void idxd_dmaengine_drv_remove(struct idxd_dev *idxd_dev)
+{
+       struct idxd_wq *wq = idxd_dev_to_wq(idxd_dev);
+
+       mutex_lock(&wq->wq_lock);
+       idxd_wq_quiesce(wq);
+       idxd_unregister_dma_channel(wq);
+       __drv_disable_wq(wq);
+       idxd_wq_free_resources(wq);
+       wq->type = IDXD_WQT_NONE;
+       mutex_unlock(&wq->wq_lock);
+}
+
+static enum idxd_dev_type dev_types[] = {
+       IDXD_DEV_WQ,
+       IDXD_DEV_NONE,
+};
+
+struct idxd_device_driver idxd_dmaengine_drv = {
+       .probe = idxd_dmaengine_drv_probe,
+       .remove = idxd_dmaengine_drv_remove,
+       .name = "dmaengine",
+       .type = dev_types,
+};
+EXPORT_SYMBOL_GPL(idxd_dmaengine_drv);
index fc708be..bfcb033 100644 (file)
 #include <linux/idr.h>
 #include <linux/pci.h>
 #include <linux/perf_event.h>
+#include <uapi/linux/idxd.h>
 #include "registers.h"
 
 #define IDXD_DRIVER_VERSION    "1.00"
 
 extern struct kmem_cache *idxd_desc_pool;
+extern bool tc_override;
 
-struct idxd_device;
 struct idxd_wq;
+struct idxd_dev;
+
+enum idxd_dev_type {
+       IDXD_DEV_NONE = -1,
+       IDXD_DEV_DSA = 0,
+       IDXD_DEV_IAX,
+       IDXD_DEV_WQ,
+       IDXD_DEV_GROUP,
+       IDXD_DEV_ENGINE,
+       IDXD_DEV_CDEV,
+       IDXD_DEV_MAX_TYPE,
+};
+
+struct idxd_dev {
+       struct device conf_dev;
+       enum idxd_dev_type type;
+};
 
 #define IDXD_REG_TIMEOUT       50
 #define IDXD_DRAIN_TIMEOUT     5000
@@ -34,9 +52,18 @@ enum idxd_type {
 #define IDXD_PMU_EVENT_MAX     64
 
 struct idxd_device_driver {
+       const char *name;
+       enum idxd_dev_type *type;
+       int (*probe)(struct idxd_dev *idxd_dev);
+       void (*remove)(struct idxd_dev *idxd_dev);
        struct device_driver drv;
 };
 
+extern struct idxd_device_driver dsa_drv;
+extern struct idxd_device_driver idxd_drv;
+extern struct idxd_device_driver idxd_dmaengine_drv;
+extern struct idxd_device_driver idxd_user_drv;
+
 struct idxd_irq_entry {
        struct idxd_device *idxd;
        int id;
@@ -51,7 +78,7 @@ struct idxd_irq_entry {
 };
 
 struct idxd_group {
-       struct device conf_dev;
+       struct idxd_dev idxd_dev;
        struct idxd_device *idxd;
        struct grpcfg grpcfg;
        int id;
@@ -110,7 +137,7 @@ enum idxd_wq_type {
 struct idxd_cdev {
        struct idxd_wq *wq;
        struct cdev cdev;
-       struct device dev;
+       struct idxd_dev idxd_dev;
        int minor;
 };
 
@@ -136,9 +163,10 @@ struct idxd_dma_chan {
 
 struct idxd_wq {
        void __iomem *portal;
+       u32 portal_offset;
        struct percpu_ref wq_active;
        struct completion wq_dead;
-       struct device conf_dev;
+       struct idxd_dev idxd_dev;
        struct idxd_cdev *idxd_cdev;
        struct wait_queue_head err_queue;
        struct idxd_device *idxd;
@@ -153,7 +181,6 @@ struct idxd_wq {
        enum idxd_wq_state state;
        unsigned long flags;
        union wqcfg *wqcfg;
-       u32 vec_ptr;            /* interrupt steering */
        struct dsa_hw_desc **hw_descs;
        int num_descs;
        union {
@@ -174,7 +201,7 @@ struct idxd_wq {
 };
 
 struct idxd_engine {
-       struct device conf_dev;
+       struct idxd_dev idxd_dev;
        int id;
        struct idxd_group *group;
        struct idxd_device *idxd;
@@ -194,7 +221,6 @@ struct idxd_hw {
 enum idxd_device_state {
        IDXD_DEV_HALTED = -1,
        IDXD_DEV_DISABLED = 0,
-       IDXD_DEV_CONF_READY,
        IDXD_DEV_ENABLED,
 };
 
@@ -218,7 +244,7 @@ struct idxd_driver_data {
 };
 
 struct idxd_device {
-       struct device conf_dev;
+       struct idxd_dev idxd_dev;
        struct idxd_driver_data *data;
        struct list_head list;
        struct idxd_hw hw;
@@ -226,7 +252,7 @@ struct idxd_device {
        unsigned long flags;
        int id;
        int major;
-       u8 cmd_status;
+       u32 cmd_status;
 
        struct pci_dev *pdev;
        void __iomem *reg_base;
@@ -290,7 +316,6 @@ struct idxd_desc {
        struct list_head list;
        int id;
        int cpu;
-       unsigned int vector;
        struct idxd_wq *wq;
 };
 
@@ -302,11 +327,62 @@ enum idxd_completion_status {
        IDXD_COMP_DESC_ABORT = 0xff,
 };
 
-#define confdev_to_idxd(dev) container_of(dev, struct idxd_device, conf_dev)
-#define confdev_to_wq(dev) container_of(dev, struct idxd_wq, conf_dev)
+#define idxd_confdev(idxd) &idxd->idxd_dev.conf_dev
+#define wq_confdev(wq) &wq->idxd_dev.conf_dev
+#define engine_confdev(engine) &engine->idxd_dev.conf_dev
+#define group_confdev(group) &group->idxd_dev.conf_dev
+#define cdev_dev(cdev) &cdev->idxd_dev.conf_dev
+
+#define confdev_to_idxd_dev(dev) container_of(dev, struct idxd_dev, conf_dev)
+#define idxd_dev_to_idxd(idxd_dev) container_of(idxd_dev, struct idxd_device, idxd_dev)
+#define idxd_dev_to_wq(idxd_dev) container_of(idxd_dev, struct idxd_wq, idxd_dev)
+
+static inline struct idxd_device *confdev_to_idxd(struct device *dev)
+{
+       struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+       return idxd_dev_to_idxd(idxd_dev);
+}
+
+static inline struct idxd_wq *confdev_to_wq(struct device *dev)
+{
+       struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+       return idxd_dev_to_wq(idxd_dev);
+}
+
+static inline struct idxd_engine *confdev_to_engine(struct device *dev)
+{
+       struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+       return container_of(idxd_dev, struct idxd_engine, idxd_dev);
+}
+
+static inline struct idxd_group *confdev_to_group(struct device *dev)
+{
+       struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+       return container_of(idxd_dev, struct idxd_group, idxd_dev);
+}
+
+static inline struct idxd_cdev *dev_to_cdev(struct device *dev)
+{
+       struct idxd_dev *idxd_dev = confdev_to_idxd_dev(dev);
+
+       return container_of(idxd_dev, struct idxd_cdev, idxd_dev);
+}
+
+static inline void idxd_dev_set_type(struct idxd_dev *idev, int type)
+{
+       if (type >= IDXD_DEV_MAX_TYPE) {
+               idev->type = IDXD_DEV_NONE;
+               return;
+       }
+
+       idev->type = type;
+}
 
 extern struct bus_type dsa_bus_type;
-extern struct bus_type iax_bus_type;
 
 extern bool support_enqcmd;
 extern struct ida idxd_ida;
@@ -316,24 +392,24 @@ extern struct device_type idxd_wq_device_type;
 extern struct device_type idxd_engine_device_type;
 extern struct device_type idxd_group_device_type;
 
-static inline bool is_dsa_dev(struct device *dev)
+static inline bool is_dsa_dev(struct idxd_dev *idxd_dev)
 {
-       return dev->type == &dsa_device_type;
+       return idxd_dev->type == IDXD_DEV_DSA;
 }
 
-static inline bool is_iax_dev(struct device *dev)
+static inline bool is_iax_dev(struct idxd_dev *idxd_dev)
 {
-       return dev->type == &iax_device_type;
+       return idxd_dev->type == IDXD_DEV_IAX;
 }
 
-static inline bool is_idxd_dev(struct device *dev)
+static inline bool is_idxd_dev(struct idxd_dev *idxd_dev)
 {
-       return is_dsa_dev(dev) || is_iax_dev(dev);
+       return is_dsa_dev(idxd_dev) || is_iax_dev(idxd_dev);
 }
 
-static inline bool is_idxd_wq_dev(struct device *dev)
+static inline bool is_idxd_wq_dev(struct idxd_dev *idxd_dev)
 {
-       return dev->type == &idxd_wq_device_type;
+       return idxd_dev->type == IDXD_DEV_WQ;
 }
 
 static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
@@ -343,11 +419,16 @@ static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
        return false;
 }
 
-static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
+static inline bool is_idxd_wq_user(struct idxd_wq *wq)
 {
        return wq->type == IDXD_WQT_USER;
 }
 
+static inline bool is_idxd_wq_kernel(struct idxd_wq *wq)
+{
+       return wq->type == IDXD_WQT_KERNEL;
+}
+
 static inline bool wq_dedicated(struct idxd_wq *wq)
 {
        return test_bit(WQ_FLAG_DEDICATED, &wq->flags);
@@ -389,6 +470,24 @@ static inline int idxd_get_wq_portal_full_offset(int wq_id,
        return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
 }
 
+#define IDXD_PORTAL_MASK       (PAGE_SIZE - 1)
+
+/*
+ * Even though this function can be accessed by multiple threads, it is safe to use.
+ * At worst the address gets used more than once before it gets incremented. We don't
+ * hit a threshold until iops becomes many million times a second. So the occasional
+ * reuse of the same address is tolerable compare to using an atomic variable. This is
+ * safe on a system that has atomic load/store for 32bit integers. Given that this is an
+ * Intel iEP device, that should not be a problem.
+ */
+static inline void __iomem *idxd_wq_portal_addr(struct idxd_wq *wq)
+{
+       int ofs = wq->portal_offset;
+
+       wq->portal_offset = (ofs + sizeof(struct dsa_raw_desc)) & IDXD_PORTAL_MASK;
+       return wq->portal + ofs;
+}
+
 static inline void idxd_wq_get(struct idxd_wq *wq)
 {
        wq->client_count++;
@@ -404,6 +503,16 @@ static inline int idxd_wq_refcount(struct idxd_wq *wq)
        return wq->client_count;
 };
 
+int __must_check __idxd_driver_register(struct idxd_device_driver *idxd_drv,
+                                       struct module *module, const char *mod_name);
+#define idxd_driver_register(driver) \
+       __idxd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+
+void idxd_driver_unregister(struct idxd_device_driver *idxd_drv);
+
+#define module_idxd_driver(__idxd_driver) \
+       module_driver(__idxd_driver, idxd_driver_register, idxd_driver_unregister)
+
 int idxd_register_bus_type(void);
 void idxd_unregister_bus_type(void);
 int idxd_register_devices(struct idxd_device *idxd);
@@ -424,13 +533,20 @@ void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
 void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
 
 /* device control */
+int idxd_register_idxd_drv(void);
+void idxd_unregister_idxd_drv(void);
+int idxd_device_drv_probe(struct idxd_dev *idxd_dev);
+void idxd_device_drv_remove(struct idxd_dev *idxd_dev);
+int drv_enable_wq(struct idxd_wq *wq);
+int __drv_enable_wq(struct idxd_wq *wq);
+void drv_disable_wq(struct idxd_wq *wq);
+void __drv_disable_wq(struct idxd_wq *wq);
 int idxd_device_init_reset(struct idxd_device *idxd);
 int idxd_device_enable(struct idxd_device *idxd);
 int idxd_device_disable(struct idxd_device *idxd);
 void idxd_device_reset(struct idxd_device *idxd);
-void idxd_device_cleanup(struct idxd_device *idxd);
+void idxd_device_clear_state(struct idxd_device *idxd);
 int idxd_device_config(struct idxd_device *idxd);
-void idxd_device_wqs_clear_state(struct idxd_device *idxd);
 void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
 int idxd_device_load_config(struct idxd_device *idxd);
 int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
@@ -443,12 +559,11 @@ void idxd_wqs_unmap_portal(struct idxd_device *idxd);
 int idxd_wq_alloc_resources(struct idxd_wq *wq);
 void idxd_wq_free_resources(struct idxd_wq *wq);
 int idxd_wq_enable(struct idxd_wq *wq);
-int idxd_wq_disable(struct idxd_wq *wq);
+int idxd_wq_disable(struct idxd_wq *wq, bool reset_config);
 void idxd_wq_drain(struct idxd_wq *wq);
 void idxd_wq_reset(struct idxd_wq *wq);
 int idxd_wq_map_portal(struct idxd_wq *wq);
 void idxd_wq_unmap_portal(struct idxd_wq *wq);
-void idxd_wq_disable_cleanup(struct idxd_wq *wq);
 int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
 int idxd_wq_disable_pasid(struct idxd_wq *wq);
 void idxd_wq_quiesce(struct idxd_wq *wq);
index c0f4c04..eb09bc5 100644 (file)
 MODULE_VERSION(IDXD_DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Intel Corporation");
+MODULE_IMPORT_NS(IDXD);
 
 static bool sva = true;
 module_param(sva, bool, 0644);
 MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
 
+bool tc_override;
+module_param(tc_override, bool, 0644);
+MODULE_PARM_DESC(tc_override, "Override traffic class defaults");
+
 #define DRV_NAME "idxd"
 
 bool support_enqcmd;
@@ -200,6 +205,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
 {
        struct device *dev = &idxd->pdev->dev;
        struct idxd_wq *wq;
+       struct device *conf_dev;
        int i, rc;
 
        idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
@@ -214,15 +220,17 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
                        goto err;
                }
 
+               idxd_dev_set_type(&wq->idxd_dev, IDXD_DEV_WQ);
+               conf_dev = wq_confdev(wq);
                wq->id = i;
                wq->idxd = idxd;
-               device_initialize(&wq->conf_dev);
-               wq->conf_dev.parent = &idxd->conf_dev;
-               wq->conf_dev.bus = &dsa_bus_type;
-               wq->conf_dev.type = &idxd_wq_device_type;
-               rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
+               device_initialize(wq_confdev(wq));
+               conf_dev->parent = idxd_confdev(idxd);
+               conf_dev->bus = &dsa_bus_type;
+               conf_dev->type = &idxd_wq_device_type;
+               rc = dev_set_name(conf_dev, "wq%d.%d", idxd->id, wq->id);
                if (rc < 0) {
-                       put_device(&wq->conf_dev);
+                       put_device(conf_dev);
                        goto err;
                }
 
@@ -233,7 +241,7 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
                wq->max_batch_size = idxd->max_batch_size;
                wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
                if (!wq->wqcfg) {
-                       put_device(&wq->conf_dev);
+                       put_device(conf_dev);
                        rc = -ENOMEM;
                        goto err;
                }
@@ -243,8 +251,11 @@ static int idxd_setup_wqs(struct idxd_device *idxd)
        return 0;
 
  err:
-       while (--i >= 0)
-               put_device(&idxd->wqs[i]->conf_dev);
+       while (--i >= 0) {
+               wq = idxd->wqs[i];
+               conf_dev = wq_confdev(wq);
+               put_device(conf_dev);
+       }
        return rc;
 }
 
@@ -252,6 +263,7 @@ static int idxd_setup_engines(struct idxd_device *idxd)
 {
        struct idxd_engine *engine;
        struct device *dev = &idxd->pdev->dev;
+       struct device *conf_dev;
        int i, rc;
 
        idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
@@ -266,15 +278,17 @@ static int idxd_setup_engines(struct idxd_device *idxd)
                        goto err;
                }
 
+               idxd_dev_set_type(&engine->idxd_dev, IDXD_DEV_ENGINE);
+               conf_dev = engine_confdev(engine);
                engine->id = i;
                engine->idxd = idxd;
-               device_initialize(&engine->conf_dev);
-               engine->conf_dev.parent = &idxd->conf_dev;
-               engine->conf_dev.bus = &dsa_bus_type;
-               engine->conf_dev.type = &idxd_engine_device_type;
-               rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
+               device_initialize(conf_dev);
+               conf_dev->parent = idxd_confdev(idxd);
+               conf_dev->bus = &dsa_bus_type;
+               conf_dev->type = &idxd_engine_device_type;
+               rc = dev_set_name(conf_dev, "engine%d.%d", idxd->id, engine->id);
                if (rc < 0) {
-                       put_device(&engine->conf_dev);
+                       put_device(conf_dev);
                        goto err;
                }
 
@@ -284,14 +298,18 @@ static int idxd_setup_engines(struct idxd_device *idxd)
        return 0;
 
  err:
-       while (--i >= 0)
-               put_device(&idxd->engines[i]->conf_dev);
+       while (--i >= 0) {
+               engine = idxd->engines[i];
+               conf_dev = engine_confdev(engine);
+               put_device(conf_dev);
+       }
        return rc;
 }
 
 static int idxd_setup_groups(struct idxd_device *idxd)
 {
        struct device *dev = &idxd->pdev->dev;
+       struct device *conf_dev;
        struct idxd_group *group;
        int i, rc;
 
@@ -307,28 +325,37 @@ static int idxd_setup_groups(struct idxd_device *idxd)
                        goto err;
                }
 
+               idxd_dev_set_type(&group->idxd_dev, IDXD_DEV_GROUP);
+               conf_dev = group_confdev(group);
                group->id = i;
                group->idxd = idxd;
-               device_initialize(&group->conf_dev);
-               group->conf_dev.parent = &idxd->conf_dev;
-               group->conf_dev.bus = &dsa_bus_type;
-               group->conf_dev.type = &idxd_group_device_type;
-               rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id);
+               device_initialize(conf_dev);
+               conf_dev->parent = idxd_confdev(idxd);
+               conf_dev->bus = &dsa_bus_type;
+               conf_dev->type = &idxd_group_device_type;
+               rc = dev_set_name(conf_dev, "group%d.%d", idxd->id, group->id);
                if (rc < 0) {
-                       put_device(&group->conf_dev);
+                       put_device(conf_dev);
                        goto err;
                }
 
                idxd->groups[i] = group;
-               group->tc_a = -1;
-               group->tc_b = -1;
+               if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override) {
+                       group->tc_a = 1;
+                       group->tc_b = 1;
+               } else {
+                       group->tc_a = -1;
+                       group->tc_b = -1;
+               }
        }
 
        return 0;
 
  err:
-       while (--i >= 0)
-               put_device(&idxd->groups[i]->conf_dev);
+       while (--i >= 0) {
+               group = idxd->groups[i];
+               put_device(group_confdev(group));
+       }
        return rc;
 }
 
@@ -337,11 +364,11 @@ static void idxd_cleanup_internals(struct idxd_device *idxd)
        int i;
 
        for (i = 0; i < idxd->max_groups; i++)
-               put_device(&idxd->groups[i]->conf_dev);
+               put_device(group_confdev(idxd->groups[i]));
        for (i = 0; i < idxd->max_engines; i++)
-               put_device(&idxd->engines[i]->conf_dev);
+               put_device(engine_confdev(idxd->engines[i]));
        for (i = 0; i < idxd->max_wqs; i++)
-               put_device(&idxd->wqs[i]->conf_dev);
+               put_device(wq_confdev(idxd->wqs[i]));
        destroy_workqueue(idxd->wq);
 }
 
@@ -381,13 +408,13 @@ static int idxd_setup_internals(struct idxd_device *idxd)
 
  err_wkq_create:
        for (i = 0; i < idxd->max_groups; i++)
-               put_device(&idxd->groups[i]->conf_dev);
+               put_device(group_confdev(idxd->groups[i]));
  err_group:
        for (i = 0; i < idxd->max_engines; i++)
-               put_device(&idxd->engines[i]->conf_dev);
+               put_device(engine_confdev(idxd->engines[i]));
  err_engine:
        for (i = 0; i < idxd->max_wqs; i++)
-               put_device(&idxd->wqs[i]->conf_dev);
+               put_device(wq_confdev(idxd->wqs[i]));
  err_wqs:
        kfree(idxd->int_handles);
        return rc;
@@ -469,6 +496,7 @@ static void idxd_read_caps(struct idxd_device *idxd)
 static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
 {
        struct device *dev = &pdev->dev;
+       struct device *conf_dev;
        struct idxd_device *idxd;
        int rc;
 
@@ -476,19 +504,21 @@ static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_d
        if (!idxd)
                return NULL;
 
+       conf_dev = idxd_confdev(idxd);
        idxd->pdev = pdev;
        idxd->data = data;
+       idxd_dev_set_type(&idxd->idxd_dev, idxd->data->type);
        idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
        if (idxd->id < 0)
                return NULL;
 
-       device_initialize(&idxd->conf_dev);
-       idxd->conf_dev.parent = dev;
-       idxd->conf_dev.bus = &dsa_bus_type;
-       idxd->conf_dev.type = idxd->data->dev_type;
-       rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
+       device_initialize(conf_dev);
+       conf_dev->parent = dev;
+       conf_dev->bus = &dsa_bus_type;
+       conf_dev->type = idxd->data->dev_type;
+       rc = dev_set_name(conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
        if (rc < 0) {
-               put_device(&idxd->conf_dev);
+               put_device(conf_dev);
                return NULL;
        }
 
@@ -639,15 +669,9 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        }
 
        dev_dbg(dev, "Set DMA masks\n");
-       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+       rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
        if (rc)
-               rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-       if (rc)
-               goto err;
-
-       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-       if (rc)
-               rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+               rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
        if (rc)
                goto err;
 
@@ -668,8 +692,6 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_dev_register;
        }
 
-       idxd->state = IDXD_DEV_CONF_READY;
-
        dev_info(&pdev->dev, "Intel(R) Accelerator Device (v%x)\n",
                 idxd->hw.version);
 
@@ -680,7 +702,7 @@ static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  err:
        pci_iounmap(pdev, idxd->reg_base);
  err_iomap:
-       put_device(&idxd->conf_dev);
+       put_device(idxd_confdev(idxd));
  err_idxd_alloc:
        pci_disable_device(pdev);
        return rc;
@@ -793,7 +815,7 @@ static void idxd_remove(struct pci_dev *pdev)
        pci_disable_device(pdev);
        destroy_workqueue(idxd->wq);
        perfmon_pmu_remove(idxd);
-       device_unregister(&idxd->conf_dev);
+       device_unregister(idxd_confdev(idxd));
 }
 
 static struct pci_driver idxd_pci_driver = {
@@ -824,13 +846,17 @@ static int __init idxd_init_module(void)
 
        perfmon_init();
 
-       err = idxd_register_bus_type();
+       err = idxd_driver_register(&idxd_drv);
        if (err < 0)
-               return err;
+               goto err_idxd_driver_register;
 
-       err = idxd_register_driver();
+       err = idxd_driver_register(&idxd_dmaengine_drv);
        if (err < 0)
-               goto err_idxd_driver_register;
+               goto err_idxd_dmaengine_driver_register;
+
+       err = idxd_driver_register(&idxd_user_drv);
+       if (err < 0)
+               goto err_idxd_user_driver_register;
 
        err = idxd_cdev_register();
        if (err)
@@ -845,19 +871,23 @@ static int __init idxd_init_module(void)
 err_pci_register:
        idxd_cdev_remove();
 err_cdev_register:
-       idxd_unregister_driver();
+       idxd_driver_unregister(&idxd_user_drv);
+err_idxd_user_driver_register:
+       idxd_driver_unregister(&idxd_dmaengine_drv);
+err_idxd_dmaengine_driver_register:
+       idxd_driver_unregister(&idxd_drv);
 err_idxd_driver_register:
-       idxd_unregister_bus_type();
        return err;
 }
 module_init(idxd_init_module);
 
 static void __exit idxd_exit_module(void)
 {
-       idxd_unregister_driver();
+       idxd_driver_unregister(&idxd_user_drv);
+       idxd_driver_unregister(&idxd_dmaengine_drv);
+       idxd_driver_unregister(&idxd_drv);
        pci_unregister_driver(&idxd_pci_driver);
        idxd_cdev_remove();
-       idxd_unregister_bus_type();
        perfmon_exit();
 }
 module_exit(idxd_exit_module);
index 4e3a719..ca88fa7 100644 (file)
@@ -22,13 +22,6 @@ struct idxd_fault {
        struct idxd_device *idxd;
 };
 
-static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
-                                enum irq_work_type wtype,
-                                int *processed, u64 data);
-static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
-                                    enum irq_work_type wtype,
-                                    int *processed, u64 data);
-
 static void idxd_device_reinit(struct work_struct *work)
 {
        struct idxd_device *idxd = container_of(work, struct idxd_device, work);
@@ -51,7 +44,7 @@ static void idxd_device_reinit(struct work_struct *work)
                        rc = idxd_wq_enable(wq);
                        if (rc < 0) {
                                dev_warn(dev, "Unable to re-enable wq %s\n",
-                                        dev_name(&wq->conf_dev));
+                                        dev_name(wq_confdev(wq)));
                        }
                }
        }
@@ -59,47 +52,7 @@ static void idxd_device_reinit(struct work_struct *work)
        return;
 
  out:
-       idxd_device_wqs_clear_state(idxd);
-}
-
-static void idxd_device_fault_work(struct work_struct *work)
-{
-       struct idxd_fault *fault = container_of(work, struct idxd_fault, work);
-       struct idxd_irq_entry *ie;
-       int i;
-       int processed;
-       int irqcnt = fault->idxd->num_wq_irqs + 1;
-
-       for (i = 1; i < irqcnt; i++) {
-               ie = &fault->idxd->irq_entries[i];
-               irq_process_work_list(ie, IRQ_WORK_PROCESS_FAULT,
-                                     &processed, fault->addr);
-               if (processed)
-                       break;
-
-               irq_process_pending_llist(ie, IRQ_WORK_PROCESS_FAULT,
-                                         &processed, fault->addr);
-               if (processed)
-                       break;
-       }
-
-       kfree(fault);
-}
-
-static int idxd_device_schedule_fault_process(struct idxd_device *idxd,
-                                             u64 fault_addr)
-{
-       struct idxd_fault *fault;
-
-       fault = kmalloc(sizeof(*fault), GFP_ATOMIC);
-       if (!fault)
-               return -ENOMEM;
-
-       fault->addr = fault_addr;
-       fault->idxd = idxd;
-       INIT_WORK(&fault->work, idxd_device_fault_work);
-       queue_work(idxd->wq, &fault->work);
-       return 0;
+       idxd_device_clear_state(idxd);
 }
 
 static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
@@ -111,7 +64,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
        bool err = false;
 
        if (cause & IDXD_INTC_ERR) {
-               spin_lock_bh(&idxd->dev_lock);
+               spin_lock(&idxd->dev_lock);
                for (i = 0; i < 4; i++)
                        idxd->sw_err.bits[i] = ioread64(idxd->reg_base +
                                        IDXD_SWERR_OFFSET + i * sizeof(u64));
@@ -136,7 +89,7 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
                        }
                }
 
-               spin_unlock_bh(&idxd->dev_lock);
+               spin_unlock(&idxd->dev_lock);
                val |= IDXD_INTC_ERR;
 
                for (i = 0; i < 4; i++)
@@ -168,15 +121,6 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
        if (!err)
                return 0;
 
-       /*
-        * This case should rarely happen and typically is due to software
-        * programming error by the driver.
-        */
-       if (idxd->sw_err.valid &&
-           idxd->sw_err.desc_valid &&
-           idxd->sw_err.fault_addr)
-               idxd_device_schedule_fault_process(idxd, idxd->sw_err.fault_addr);
-
        gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
        if (gensts.state == IDXD_DEVICE_STATE_HALT) {
                idxd->state = IDXD_DEV_HALTED;
@@ -189,15 +133,15 @@ static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
                        INIT_WORK(&idxd->work, idxd_device_reinit);
                        queue_work(idxd->wq, &idxd->work);
                } else {
-                       spin_lock_bh(&idxd->dev_lock);
+                       spin_lock(&idxd->dev_lock);
                        idxd_wqs_quiesce(idxd);
                        idxd_wqs_unmap_portal(idxd);
-                       idxd_device_wqs_clear_state(idxd);
+                       idxd_device_clear_state(idxd);
                        dev_err(&idxd->pdev->dev,
                                "idxd halted, need %s.\n",
                                gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
                                "FLR" : "system reset");
-                       spin_unlock_bh(&idxd->dev_lock);
+                       spin_unlock(&idxd->dev_lock);
                        return -ENXIO;
                }
        }
@@ -228,127 +172,79 @@ irqreturn_t idxd_misc_thread(int vec, void *data)
        return IRQ_HANDLED;
 }
 
-static inline bool match_fault(struct idxd_desc *desc, u64 fault_addr)
-{
-       /*
-        * Completion address can be bad as well. Check fault address match for descriptor
-        * and completion address.
-        */
-       if ((u64)desc->hw == fault_addr || (u64)desc->completion == fault_addr) {
-               struct idxd_device *idxd = desc->wq->idxd;
-               struct device *dev = &idxd->pdev->dev;
-
-               dev_warn(dev, "desc with fault address: %#llx\n", fault_addr);
-               return true;
-       }
-
-       return false;
-}
-
-static int irq_process_pending_llist(struct idxd_irq_entry *irq_entry,
-                                    enum irq_work_type wtype,
-                                    int *processed, u64 data)
+static void irq_process_pending_llist(struct idxd_irq_entry *irq_entry)
 {
        struct idxd_desc *desc, *t;
        struct llist_node *head;
-       int queued = 0;
-       unsigned long flags;
-       enum idxd_complete_type reason;
 
-       *processed = 0;
        head = llist_del_all(&irq_entry->pending_llist);
        if (!head)
-               goto out;
-
-       if (wtype == IRQ_WORK_NORMAL)
-               reason = IDXD_COMPLETE_NORMAL;
-       else
-               reason = IDXD_COMPLETE_DEV_FAIL;
+               return;
 
        llist_for_each_entry_safe(desc, t, head, llnode) {
                u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
 
                if (status) {
-                       if (unlikely(status == IDXD_COMP_DESC_ABORT)) {
+                       /*
+                        * Check against the original status as ABORT is software defined
+                        * and 0xff, which DSA_COMP_STATUS_MASK can mask out.
+                        */
+                       if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
                                complete_desc(desc, IDXD_COMPLETE_ABORT);
-                               (*processed)++;
                                continue;
                        }
 
-                       if (unlikely(status != DSA_COMP_SUCCESS))
-                               match_fault(desc, data);
-                       complete_desc(desc, reason);
-                       (*processed)++;
+                       complete_desc(desc, IDXD_COMPLETE_NORMAL);
                } else {
-                       spin_lock_irqsave(&irq_entry->list_lock, flags);
+                       spin_lock(&irq_entry->list_lock);
                        list_add_tail(&desc->list,
                                      &irq_entry->work_list);
-                       spin_unlock_irqrestore(&irq_entry->list_lock, flags);
-                       queued++;
+                       spin_unlock(&irq_entry->list_lock);
                }
        }
-
- out:
-       return queued;
 }
 
-static int irq_process_work_list(struct idxd_irq_entry *irq_entry,
-                                enum irq_work_type wtype,
-                                int *processed, u64 data)
+static void irq_process_work_list(struct idxd_irq_entry *irq_entry)
 {
-       int queued = 0;
-       unsigned long flags;
        LIST_HEAD(flist);
        struct idxd_desc *desc, *n;
-       enum idxd_complete_type reason;
-
-       *processed = 0;
-       if (wtype == IRQ_WORK_NORMAL)
-               reason = IDXD_COMPLETE_NORMAL;
-       else
-               reason = IDXD_COMPLETE_DEV_FAIL;
 
        /*
         * This lock protects list corruption from access of list outside of the irq handler
         * thread.
         */
-       spin_lock_irqsave(&irq_entry->list_lock, flags);
+       spin_lock(&irq_entry->list_lock);
        if (list_empty(&irq_entry->work_list)) {
-               spin_unlock_irqrestore(&irq_entry->list_lock, flags);
-               return 0;
+               spin_unlock(&irq_entry->list_lock);
+               return;
        }
 
        list_for_each_entry_safe(desc, n, &irq_entry->work_list, list) {
                if (desc->completion->status) {
                        list_del(&desc->list);
-                       (*processed)++;
                        list_add_tail(&desc->list, &flist);
-               } else {
-                       queued++;
                }
        }
 
-       spin_unlock_irqrestore(&irq_entry->list_lock, flags);
+       spin_unlock(&irq_entry->list_lock);
 
        list_for_each_entry(desc, &flist, list) {
-               u8 status = desc->completion->status & DSA_COMP_STATUS_MASK;
-
-               if (unlikely(status == IDXD_COMP_DESC_ABORT)) {
+               /*
+                * Check against the original status as ABORT is software defined
+                * and 0xff, which DSA_COMP_STATUS_MASK can mask out.
+                */
+               if (unlikely(desc->completion->status == IDXD_COMP_DESC_ABORT)) {
                        complete_desc(desc, IDXD_COMPLETE_ABORT);
                        continue;
                }
 
-               if (unlikely(status != DSA_COMP_SUCCESS))
-                       match_fault(desc, data);
-               complete_desc(desc, reason);
+               complete_desc(desc, IDXD_COMPLETE_NORMAL);
        }
-
-       return queued;
 }
 
-static int idxd_desc_process(struct idxd_irq_entry *irq_entry)
+irqreturn_t idxd_wq_thread(int irq, void *data)
 {
-       int rc, processed, total = 0;
+       struct idxd_irq_entry *irq_entry = data;
 
        /*
         * There are two lists we are processing. The pending_llist is where
@@ -367,31 +263,9 @@ static int idxd_desc_process(struct idxd_irq_entry *irq_entry)
         *    and process the completed entries.
         * 4. If the entry is still waiting on hardware, list_add_tail() to
         *    the work_list.
-        * 5. Repeat until no more descriptors.
         */
-       do {
-               rc = irq_process_work_list(irq_entry, IRQ_WORK_NORMAL,
-                                          &processed, 0);
-               total += processed;
-               if (rc != 0)
-                       continue;
-
-               rc = irq_process_pending_llist(irq_entry, IRQ_WORK_NORMAL,
-                                              &processed, 0);
-               total += processed;
-       } while (rc != 0);
-
-       return total;
-}
-
-irqreturn_t idxd_wq_thread(int irq, void *data)
-{
-       struct idxd_irq_entry *irq_entry = data;
-       int processed;
-
-       processed = idxd_desc_process(irq_entry);
-       if (processed == 0)
-               return IRQ_NONE;
+       irq_process_work_list(irq_entry);
+       irq_process_pending_llist(irq_entry);
 
        return IRQ_HANDLED;
 }
index c970c3f..ffc7550 100644 (file)
@@ -7,6 +7,9 @@
 #define PCI_DEVICE_ID_INTEL_DSA_SPR0   0x0b25
 #define PCI_DEVICE_ID_INTEL_IAX_SPR0   0x0cfe
 
+#define DEVICE_VERSION_1               0x100
+#define DEVICE_VERSION_2               0x200
+
 #define IDXD_MMIO_BAR          0
 #define IDXD_WQ_BAR            2
 #define IDXD_PORTAL_SIZE       PAGE_SIZE
@@ -349,6 +352,9 @@ union wqcfg {
 } __packed;
 
 #define WQCFG_PASID_IDX                2
+#define WQCFG_OCCUP_IDX                6
+
+#define WQCFG_OCCUP_MASK       0xffff
 
 /*
  * This macro calculates the offset into the WQCFG register
index 36c9c1a..de76fb4 100644 (file)
@@ -22,21 +22,13 @@ static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
                desc->hw->pasid = idxd->pasid;
 
        /*
-        * Descriptor completion vectors are 1...N for MSIX. We will round
-        * robin through the N vectors.
+        * On host, MSIX vecotr 0 is used for misc interrupt. Therefore when we match
+        * vector 1:1 to the WQ id, we need to add 1
         */
-       wq->vec_ptr = desc->vector = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
-       if (!idxd->int_handles) {
-               desc->hw->int_handle = wq->vec_ptr;
-       } else {
-               /*
-                * int_handles are only for descriptor completion. However for device
-                * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even
-                * though we are rotating through 1...N for descriptor interrupts, we
-                * need to acqurie the int_handles from 0..N-1.
-                */
-               desc->hw->int_handle = idxd->int_handles[desc->vector - 1];
-       }
+       if (!idxd->int_handles)
+               desc->hw->int_handle = wq->id + 1;
+       else
+               desc->hw->int_handle = idxd->int_handles[wq->id];
 
        return desc;
 }
@@ -67,7 +59,7 @@ struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
                if (signal_pending_state(TASK_INTERRUPTIBLE, current))
                        break;
                idx = sbitmap_queue_get(sbq, &cpu);
-               if (idx > 0)
+               if (idx >= 0)
                        break;
                schedule();
        }
@@ -114,14 +106,13 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
 {
        struct idxd_desc *d, *t, *found = NULL;
        struct llist_node *head;
-       unsigned long flags;
 
        desc->completion->status = IDXD_COMP_DESC_ABORT;
        /*
         * Grab the list lock so it will block the irq thread handler. This allows the
         * abort code to locate the descriptor need to be aborted.
         */
-       spin_lock_irqsave(&ie->list_lock, flags);
+       spin_lock(&ie->list_lock);
        head = llist_del_all(&ie->pending_llist);
        if (head) {
                llist_for_each_entry_safe(d, t, head, llnode) {
@@ -135,7 +126,7 @@ static void llist_abort_desc(struct idxd_wq *wq, struct idxd_irq_entry *ie,
 
        if (!found)
                found = list_abort_desc(wq, ie, desc);
-       spin_unlock_irqrestore(&ie->list_lock, flags);
+       spin_unlock(&ie->list_lock);
 
        if (found)
                complete_desc(found, IDXD_COMPLETE_ABORT);
@@ -148,13 +139,17 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
        void __iomem *portal;
        int rc;
 
-       if (idxd->state != IDXD_DEV_ENABLED)
+       if (idxd->state != IDXD_DEV_ENABLED) {
+               idxd_free_desc(wq, desc);
                return -EIO;
+       }
 
-       if (!percpu_ref_tryget_live(&wq->wq_active))
+       if (!percpu_ref_tryget_live(&wq->wq_active)) {
+               idxd_free_desc(wq, desc);
                return -ENXIO;
+       }
 
-       portal = wq->portal;
+       portal = idxd_wq_portal_addr(wq);
 
        /*
         * The wmb() flushes writes to coherent DMA data before
@@ -168,7 +163,7 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
         * that we designated the descriptor to.
         */
        if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
-               ie = &idxd->irq_entries[desc->vector];
+               ie = &idxd->irq_entries[wq->id + 1];
                llist_add(&desc->llnode, &ie->pending_llist);
        }
 
@@ -183,8 +178,12 @@ int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
                 */
                rc = enqcmds(portal, desc->hw);
                if (rc < 0) {
+                       percpu_ref_put(&wq->wq_active);
+                       /* abort operation frees the descriptor */
                        if (ie)
                                llist_abort_desc(wq, ie, desc);
+                       else
+                               idxd_free_desc(wq, desc);
                        return rc;
                }
        }
index 26d8ff9..a9025be 100644 (file)
@@ -16,336 +16,11 @@ static char *idxd_wq_type_names[] = {
        [IDXD_WQT_USER]         = "user",
 };
 
-static int idxd_config_bus_match(struct device *dev,
-                                struct device_driver *drv)
-{
-       int matched = 0;
-
-       if (is_idxd_dev(dev)) {
-               struct idxd_device *idxd = confdev_to_idxd(dev);
-
-               if (idxd->state != IDXD_DEV_CONF_READY)
-                       return 0;
-               matched = 1;
-       } else if (is_idxd_wq_dev(dev)) {
-               struct idxd_wq *wq = confdev_to_wq(dev);
-               struct idxd_device *idxd = wq->idxd;
-
-               if (idxd->state < IDXD_DEV_CONF_READY)
-                       return 0;
-
-               if (wq->state != IDXD_WQ_DISABLED) {
-                       dev_dbg(dev, "%s not disabled\n", dev_name(dev));
-                       return 0;
-               }
-               matched = 1;
-       }
-
-       if (matched)
-               dev_dbg(dev, "%s matched\n", dev_name(dev));
-
-       return matched;
-}
-
-static int enable_wq(struct idxd_wq *wq)
-{
-       struct idxd_device *idxd = wq->idxd;
-       struct device *dev = &idxd->pdev->dev;
-       unsigned long flags;
-       int rc;
-
-       mutex_lock(&wq->wq_lock);
-
-       if (idxd->state != IDXD_DEV_ENABLED) {
-               mutex_unlock(&wq->wq_lock);
-               dev_warn(dev, "Enabling while device not enabled.\n");
-               return -EPERM;
-       }
-
-       if (wq->state != IDXD_WQ_DISABLED) {
-               mutex_unlock(&wq->wq_lock);
-               dev_warn(dev, "WQ %d already enabled.\n", wq->id);
-               return -EBUSY;
-       }
-
-       if (!wq->group) {
-               mutex_unlock(&wq->wq_lock);
-               dev_warn(dev, "WQ not attached to group.\n");
-               return -EINVAL;
-       }
-
-       if (strlen(wq->name) == 0) {
-               mutex_unlock(&wq->wq_lock);
-               dev_warn(dev, "WQ name not set.\n");
-               return -EINVAL;
-       }
-
-       /* Shared WQ checks */
-       if (wq_shared(wq)) {
-               if (!device_swq_supported(idxd)) {
-                       dev_warn(dev, "PASID not enabled and shared WQ.\n");
-                       mutex_unlock(&wq->wq_lock);
-                       return -ENXIO;
-               }
-               /*
-                * Shared wq with the threshold set to 0 means the user
-                * did not set the threshold or transitioned from a
-                * dedicated wq but did not set threshold. A value
-                * of 0 would effectively disable the shared wq. The
-                * driver does not allow a value of 0 to be set for
-                * threshold via sysfs.
-                */
-               if (wq->threshold == 0) {
-                       dev_warn(dev, "Shared WQ and threshold 0.\n");
-                       mutex_unlock(&wq->wq_lock);
-                       return -EINVAL;
-               }
-       }
-
-       rc = idxd_wq_alloc_resources(wq);
-       if (rc < 0) {
-               mutex_unlock(&wq->wq_lock);
-               dev_warn(dev, "WQ resource alloc failed\n");
-               return rc;
-       }
-
-       spin_lock_irqsave(&idxd->dev_lock, flags);
-       if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
-               rc = idxd_device_config(idxd);
-       spin_unlock_irqrestore(&idxd->dev_lock, flags);
-       if (rc < 0) {
-               mutex_unlock(&wq->wq_lock);
-               dev_warn(dev, "Writing WQ %d config failed: %d\n", wq->id, rc);
-               return rc;
-       }
-
-       rc = idxd_wq_enable(wq);
-       if (rc < 0) {
-               mutex_unlock(&wq->wq_lock);
-               dev_warn(dev, "WQ %d enabling failed: %d\n", wq->id, rc);
-               return rc;
-       }
-
-       rc = idxd_wq_map_portal(wq);
-       if (rc < 0) {
-               dev_warn(dev, "wq portal mapping failed: %d\n", rc);
-               rc = idxd_wq_disable(wq);
-               if (rc < 0)
-                       dev_warn(dev, "IDXD wq disable failed\n");
-               mutex_unlock(&wq->wq_lock);
-               return rc;
-       }
-
-       wq->client_count = 0;
-
-       if (wq->type == IDXD_WQT_KERNEL) {
-               rc = idxd_wq_init_percpu_ref(wq);
-               if (rc < 0) {
-                       dev_dbg(dev, "percpu_ref setup failed\n");
-                       mutex_unlock(&wq->wq_lock);
-                       return rc;
-               }
-       }
-
-       if (is_idxd_wq_dmaengine(wq)) {
-               rc = idxd_register_dma_channel(wq);
-               if (rc < 0) {
-                       dev_dbg(dev, "DMA channel register failed\n");
-                       mutex_unlock(&wq->wq_lock);
-                       return rc;
-               }
-       } else if (is_idxd_wq_cdev(wq)) {
-               rc = idxd_wq_add_cdev(wq);
-               if (rc < 0) {
-                       dev_dbg(dev, "Cdev creation failed\n");
-                       mutex_unlock(&wq->wq_lock);
-                       return rc;
-               }
-       }
-
-       mutex_unlock(&wq->wq_lock);
-       dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
-
-       return 0;
-}
-
-static int idxd_config_bus_probe(struct device *dev)
-{
-       int rc = 0;
-       unsigned long flags;
-
-       dev_dbg(dev, "%s called\n", __func__);
-
-       if (is_idxd_dev(dev)) {
-               struct idxd_device *idxd = confdev_to_idxd(dev);
-
-               if (idxd->state != IDXD_DEV_CONF_READY) {
-                       dev_warn(dev, "Device not ready for config\n");
-                       return -EBUSY;
-               }
-
-               if (!try_module_get(THIS_MODULE))
-                       return -ENXIO;
-
-               /* Perform IDXD configuration and enabling */
-               spin_lock_irqsave(&idxd->dev_lock, flags);
-               if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
-                       rc = idxd_device_config(idxd);
-               spin_unlock_irqrestore(&idxd->dev_lock, flags);
-               if (rc < 0) {
-                       module_put(THIS_MODULE);
-                       dev_warn(dev, "Device config failed: %d\n", rc);
-                       return rc;
-               }
-
-               /* start device */
-               rc = idxd_device_enable(idxd);
-               if (rc < 0) {
-                       module_put(THIS_MODULE);
-                       dev_warn(dev, "Device enable failed: %d\n", rc);
-                       return rc;
-               }
-
-               dev_info(dev, "Device %s enabled\n", dev_name(dev));
-
-               rc = idxd_register_dma_device(idxd);
-               if (rc < 0) {
-                       module_put(THIS_MODULE);
-                       dev_dbg(dev, "Failed to register dmaengine device\n");
-                       return rc;
-               }
-               return 0;
-       } else if (is_idxd_wq_dev(dev)) {
-               struct idxd_wq *wq = confdev_to_wq(dev);
-
-               return enable_wq(wq);
-       }
-
-       return -ENODEV;
-}
-
-static void disable_wq(struct idxd_wq *wq)
-{
-       struct idxd_device *idxd = wq->idxd;
-       struct device *dev = &idxd->pdev->dev;
-
-       mutex_lock(&wq->wq_lock);
-       dev_dbg(dev, "%s removing WQ %s\n", __func__, dev_name(&wq->conf_dev));
-       if (wq->state == IDXD_WQ_DISABLED) {
-               mutex_unlock(&wq->wq_lock);
-               return;
-       }
-
-       if (wq->type == IDXD_WQT_KERNEL)
-               idxd_wq_quiesce(wq);
-
-       if (is_idxd_wq_dmaengine(wq))
-               idxd_unregister_dma_channel(wq);
-       else if (is_idxd_wq_cdev(wq))
-               idxd_wq_del_cdev(wq);
-
-       if (idxd_wq_refcount(wq))
-               dev_warn(dev, "Clients has claim on wq %d: %d\n",
-                        wq->id, idxd_wq_refcount(wq));
-
-       idxd_wq_unmap_portal(wq);
-
-       idxd_wq_drain(wq);
-       idxd_wq_reset(wq);
-
-       idxd_wq_free_resources(wq);
-       wq->client_count = 0;
-       mutex_unlock(&wq->wq_lock);
-
-       dev_info(dev, "wq %s disabled\n", dev_name(&wq->conf_dev));
-}
-
-static void idxd_config_bus_remove(struct device *dev)
-{
-       int rc;
-
-       dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev));
-
-       /* disable workqueue here */
-       if (is_idxd_wq_dev(dev)) {
-               struct idxd_wq *wq = confdev_to_wq(dev);
-
-               disable_wq(wq);
-       } else if (is_idxd_dev(dev)) {
-               struct idxd_device *idxd = confdev_to_idxd(dev);
-               int i;
-
-               dev_dbg(dev, "%s removing dev %s\n", __func__,
-                       dev_name(&idxd->conf_dev));
-               for (i = 0; i < idxd->max_wqs; i++) {
-                       struct idxd_wq *wq = idxd->wqs[i];
-
-                       if (wq->state == IDXD_WQ_DISABLED)
-                               continue;
-                       dev_warn(dev, "Active wq %d on disable %s.\n", i,
-                                dev_name(&idxd->conf_dev));
-                       device_release_driver(&wq->conf_dev);
-               }
-
-               idxd_unregister_dma_device(idxd);
-               rc = idxd_device_disable(idxd);
-               if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
-                       for (i = 0; i < idxd->max_wqs; i++) {
-                               struct idxd_wq *wq = idxd->wqs[i];
-
-                               mutex_lock(&wq->wq_lock);
-                               idxd_wq_disable_cleanup(wq);
-                               mutex_unlock(&wq->wq_lock);
-                       }
-               }
-               module_put(THIS_MODULE);
-               if (rc < 0)
-                       dev_warn(dev, "Device disable failed\n");
-               else
-                       dev_info(dev, "Device %s disabled\n", dev_name(dev));
-
-       }
-}
-
-static void idxd_config_bus_shutdown(struct device *dev)
-{
-       dev_dbg(dev, "%s called\n", __func__);
-}
-
-struct bus_type dsa_bus_type = {
-       .name = "dsa",
-       .match = idxd_config_bus_match,
-       .probe = idxd_config_bus_probe,
-       .remove = idxd_config_bus_remove,
-       .shutdown = idxd_config_bus_shutdown,
-};
-
-static struct idxd_device_driver dsa_drv = {
-       .drv = {
-               .name = "dsa",
-               .bus = &dsa_bus_type,
-               .owner = THIS_MODULE,
-               .mod_name = KBUILD_MODNAME,
-       },
-};
-
-/* IDXD generic driver setup */
-int idxd_register_driver(void)
-{
-       return driver_register(&dsa_drv.drv);
-}
-
-void idxd_unregister_driver(void)
-{
-       driver_unregister(&dsa_drv.drv);
-}
-
 /* IDXD engine attributes */
 static ssize_t engine_group_id_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
 {
-       struct idxd_engine *engine =
-               container_of(dev, struct idxd_engine, conf_dev);
+       struct idxd_engine *engine = confdev_to_engine(dev);
 
        if (engine->group)
                return sysfs_emit(buf, "%d\n", engine->group->id);
@@ -357,8 +32,7 @@ static ssize_t engine_group_id_store(struct device *dev,
                                     struct device_attribute *attr,
                                     const char *buf, size_t count)
 {
-       struct idxd_engine *engine =
-               container_of(dev, struct idxd_engine, conf_dev);
+       struct idxd_engine *engine = confdev_to_engine(dev);
        struct idxd_device *idxd = engine->idxd;
        long id;
        int rc;
@@ -412,7 +86,7 @@ static const struct attribute_group *idxd_engine_attribute_groups[] = {
 
 static void idxd_conf_engine_release(struct device *dev)
 {
-       struct idxd_engine *engine = container_of(dev, struct idxd_engine, conf_dev);
+       struct idxd_engine *engine = confdev_to_engine(dev);
 
        kfree(engine);
 }
@@ -442,8 +116,7 @@ static ssize_t group_tokens_reserved_show(struct device *dev,
                                          struct device_attribute *attr,
                                          char *buf)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
 
        return sysfs_emit(buf, "%u\n", group->tokens_reserved);
 }
@@ -452,8 +125,7 @@ static ssize_t group_tokens_reserved_store(struct device *dev,
                                           struct device_attribute *attr,
                                           const char *buf, size_t count)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
        struct idxd_device *idxd = group->idxd;
        unsigned long val;
        int rc;
@@ -490,8 +162,7 @@ static ssize_t group_tokens_allowed_show(struct device *dev,
                                         struct device_attribute *attr,
                                         char *buf)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
 
        return sysfs_emit(buf, "%u\n", group->tokens_allowed);
 }
@@ -500,8 +171,7 @@ static ssize_t group_tokens_allowed_store(struct device *dev,
                                          struct device_attribute *attr,
                                          const char *buf, size_t count)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
        struct idxd_device *idxd = group->idxd;
        unsigned long val;
        int rc;
@@ -535,8 +205,7 @@ static ssize_t group_use_token_limit_show(struct device *dev,
                                          struct device_attribute *attr,
                                          char *buf)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
 
        return sysfs_emit(buf, "%u\n", group->use_token_limit);
 }
@@ -545,8 +214,7 @@ static ssize_t group_use_token_limit_store(struct device *dev,
                                           struct device_attribute *attr,
                                           const char *buf, size_t count)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
        struct idxd_device *idxd = group->idxd;
        unsigned long val;
        int rc;
@@ -578,8 +246,7 @@ static struct device_attribute dev_attr_group_use_token_limit =
 static ssize_t group_engines_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
        int i, rc = 0;
        struct idxd_device *idxd = group->idxd;
 
@@ -607,8 +274,7 @@ static struct device_attribute dev_attr_group_engines =
 static ssize_t group_work_queues_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
        int i, rc = 0;
        struct idxd_device *idxd = group->idxd;
 
@@ -637,8 +303,7 @@ static ssize_t group_traffic_class_a_show(struct device *dev,
                                          struct device_attribute *attr,
                                          char *buf)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
 
        return sysfs_emit(buf, "%d\n", group->tc_a);
 }
@@ -647,8 +312,7 @@ static ssize_t group_traffic_class_a_store(struct device *dev,
                                           struct device_attribute *attr,
                                           const char *buf, size_t count)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
        struct idxd_device *idxd = group->idxd;
        long val;
        int rc;
@@ -663,6 +327,9 @@ static ssize_t group_traffic_class_a_store(struct device *dev,
        if (idxd->state == IDXD_DEV_ENABLED)
                return -EPERM;
 
+       if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override)
+               return -EPERM;
+
        if (val < 0 || val > 7)
                return -EINVAL;
 
@@ -678,8 +345,7 @@ static ssize_t group_traffic_class_b_show(struct device *dev,
                                          struct device_attribute *attr,
                                          char *buf)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
 
        return sysfs_emit(buf, "%d\n", group->tc_b);
 }
@@ -688,8 +354,7 @@ static ssize_t group_traffic_class_b_store(struct device *dev,
                                           struct device_attribute *attr,
                                           const char *buf, size_t count)
 {
-       struct idxd_group *group =
-               container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
        struct idxd_device *idxd = group->idxd;
        long val;
        int rc;
@@ -704,6 +369,9 @@ static ssize_t group_traffic_class_b_store(struct device *dev,
        if (idxd->state == IDXD_DEV_ENABLED)
                return -EPERM;
 
+       if (idxd->hw.version < DEVICE_VERSION_2 && !tc_override)
+               return -EPERM;
+
        if (val < 0 || val > 7)
                return -EINVAL;
 
@@ -737,7 +405,7 @@ static const struct attribute_group *idxd_group_attribute_groups[] = {
 
 static void idxd_conf_group_release(struct device *dev)
 {
-       struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev);
+       struct idxd_group *group = confdev_to_group(dev);
 
        kfree(group);
 }
@@ -752,7 +420,7 @@ struct device_type idxd_group_device_type = {
 static ssize_t wq_clients_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%d\n", wq->client_count);
 }
@@ -763,7 +431,7 @@ static struct device_attribute dev_attr_wq_clients =
 static ssize_t wq_state_show(struct device *dev,
                             struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        switch (wq->state) {
        case IDXD_WQ_DISABLED:
@@ -781,7 +449,7 @@ static struct device_attribute dev_attr_wq_state =
 static ssize_t wq_group_id_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        if (wq->group)
                return sysfs_emit(buf, "%u\n", wq->group->id);
@@ -793,7 +461,7 @@ static ssize_t wq_group_id_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        struct idxd_device *idxd = wq->idxd;
        long id;
        int rc;
@@ -836,7 +504,7 @@ static struct device_attribute dev_attr_wq_group_id =
 static ssize_t wq_mode_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared");
 }
@@ -845,7 +513,7 @@ static ssize_t wq_mode_store(struct device *dev,
                             struct device_attribute *attr, const char *buf,
                             size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        struct idxd_device *idxd = wq->idxd;
 
        if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
@@ -872,7 +540,7 @@ static struct device_attribute dev_attr_wq_mode =
 static ssize_t wq_size_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%u\n", wq->size);
 }
@@ -895,7 +563,7 @@ static ssize_t wq_size_store(struct device *dev,
                             struct device_attribute *attr, const char *buf,
                             size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        unsigned long size;
        struct idxd_device *idxd = wq->idxd;
        int rc;
@@ -923,7 +591,7 @@ static struct device_attribute dev_attr_wq_size =
 static ssize_t wq_priority_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%u\n", wq->priority);
 }
@@ -932,7 +600,7 @@ static ssize_t wq_priority_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        unsigned long prio;
        struct idxd_device *idxd = wq->idxd;
        int rc;
@@ -960,7 +628,7 @@ static struct device_attribute dev_attr_wq_priority =
 static ssize_t wq_block_on_fault_show(struct device *dev,
                                      struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags));
 }
@@ -969,11 +637,14 @@ static ssize_t wq_block_on_fault_store(struct device *dev,
                                       struct device_attribute *attr,
                                       const char *buf, size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        struct idxd_device *idxd = wq->idxd;
        bool bof;
        int rc;
 
+       if (!idxd->hw.gen_cap.block_on_fault)
+               return -EOPNOTSUPP;
+
        if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
                return -EPERM;
 
@@ -999,7 +670,7 @@ static struct device_attribute dev_attr_wq_block_on_fault =
 static ssize_t wq_threshold_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%u\n", wq->threshold);
 }
@@ -1008,7 +679,7 @@ static ssize_t wq_threshold_store(struct device *dev,
                                  struct device_attribute *attr,
                                  const char *buf, size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        struct idxd_device *idxd = wq->idxd;
        unsigned int val;
        int rc;
@@ -1040,7 +711,7 @@ static struct device_attribute dev_attr_wq_threshold =
 static ssize_t wq_type_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        switch (wq->type) {
        case IDXD_WQT_KERNEL:
@@ -1059,7 +730,7 @@ static ssize_t wq_type_store(struct device *dev,
                             struct device_attribute *attr, const char *buf,
                             size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        enum idxd_wq_type old_type;
 
        if (wq->state != IDXD_WQ_DISABLED)
@@ -1088,7 +759,7 @@ static struct device_attribute dev_attr_wq_type =
 static ssize_t wq_name_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%s\n", wq->name);
 }
@@ -1097,7 +768,7 @@ static ssize_t wq_name_store(struct device *dev,
                             struct device_attribute *attr, const char *buf,
                             size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        if (wq->state != IDXD_WQ_DISABLED)
                return -EPERM;
@@ -1124,7 +795,7 @@ static struct device_attribute dev_attr_wq_name =
 static ssize_t wq_cdev_minor_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        int minor = -1;
 
        mutex_lock(&wq->wq_lock);
@@ -1158,7 +829,7 @@ static int __get_sysfs_u64(const char *buf, u64 *val)
 static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attribute *attr,
                                         char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes);
 }
@@ -1166,7 +837,7 @@ static ssize_t wq_max_transfer_size_show(struct device *dev, struct device_attri
 static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr,
                                          const char *buf, size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        struct idxd_device *idxd = wq->idxd;
        u64 xfer_size;
        int rc;
@@ -1192,7 +863,7 @@ static struct device_attribute dev_attr_wq_max_transfer_size =
 
 static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%u\n", wq->max_batch_size);
 }
@@ -1200,7 +871,7 @@ static ssize_t wq_max_batch_size_show(struct device *dev, struct device_attribut
 static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr,
                                       const char *buf, size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        struct idxd_device *idxd = wq->idxd;
        u64 batch_size;
        int rc;
@@ -1225,7 +896,7 @@ static struct device_attribute dev_attr_wq_max_batch_size =
 
 static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *attr, char *buf)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        return sysfs_emit(buf, "%u\n", wq->ats_dis);
 }
@@ -1233,7 +904,7 @@ static ssize_t wq_ats_disable_show(struct device *dev, struct device_attribute *
 static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr,
                                    const char *buf, size_t count)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
        struct idxd_device *idxd = wq->idxd;
        bool ats_dis;
        int rc;
@@ -1256,6 +927,24 @@ static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute
 static struct device_attribute dev_attr_wq_ats_disable =
                __ATTR(ats_disable, 0644, wq_ats_disable_show, wq_ats_disable_store);
 
+static ssize_t wq_occupancy_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct idxd_wq *wq = confdev_to_wq(dev);
+       struct idxd_device *idxd = wq->idxd;
+       u32 occup, offset;
+
+       if (!idxd->hw.wq_cap.occupancy)
+               return -EOPNOTSUPP;
+
+       offset = WQCFG_OFFSET(idxd, wq->id, WQCFG_OCCUP_IDX);
+       occup = ioread32(idxd->reg_base + offset) & WQCFG_OCCUP_MASK;
+
+       return sysfs_emit(buf, "%u\n", occup);
+}
+
+static struct device_attribute dev_attr_wq_occupancy =
+               __ATTR(occupancy, 0444, wq_occupancy_show, NULL);
+
 static struct attribute *idxd_wq_attributes[] = {
        &dev_attr_wq_clients.attr,
        &dev_attr_wq_state.attr,
@@ -1271,6 +960,7 @@ static struct attribute *idxd_wq_attributes[] = {
        &dev_attr_wq_max_transfer_size.attr,
        &dev_attr_wq_max_batch_size.attr,
        &dev_attr_wq_ats_disable.attr,
+       &dev_attr_wq_occupancy.attr,
        NULL,
 };
 
@@ -1285,7 +975,7 @@ static const struct attribute_group *idxd_wq_attribute_groups[] = {
 
 static void idxd_conf_wq_release(struct device *dev)
 {
-       struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+       struct idxd_wq *wq = confdev_to_wq(dev);
 
        kfree(wq->wqcfg);
        kfree(wq);
@@ -1301,8 +991,7 @@ struct device_type idxd_wq_device_type = {
 static ssize_t version_show(struct device *dev, struct device_attribute *attr,
                            char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%#x\n", idxd->hw.version);
 }
@@ -1312,8 +1001,7 @@ static ssize_t max_work_queues_size_show(struct device *dev,
                                         struct device_attribute *attr,
                                         char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", idxd->max_wq_size);
 }
@@ -1322,8 +1010,7 @@ static DEVICE_ATTR_RO(max_work_queues_size);
 static ssize_t max_groups_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", idxd->max_groups);
 }
@@ -1332,8 +1019,7 @@ static DEVICE_ATTR_RO(max_groups);
 static ssize_t max_work_queues_show(struct device *dev,
                                    struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", idxd->max_wqs);
 }
@@ -1342,8 +1028,7 @@ static DEVICE_ATTR_RO(max_work_queues);
 static ssize_t max_engines_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", idxd->max_engines);
 }
@@ -1352,8 +1037,7 @@ static DEVICE_ATTR_RO(max_engines);
 static ssize_t numa_node_show(struct device *dev,
                              struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
 }
@@ -1362,8 +1046,7 @@ static DEVICE_ATTR_RO(numa_node);
 static ssize_t max_batch_size_show(struct device *dev,
                                   struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", idxd->max_batch_size);
 }
@@ -1373,8 +1056,7 @@ static ssize_t max_transfer_size_show(struct device *dev,
                                      struct device_attribute *attr,
                                      char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes);
 }
@@ -1383,8 +1065,7 @@ static DEVICE_ATTR_RO(max_transfer_size);
 static ssize_t op_cap_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
        int i, rc = 0;
 
        for (i = 0; i < 4; i++)
@@ -1399,8 +1080,7 @@ static DEVICE_ATTR_RO(op_cap);
 static ssize_t gen_cap_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits);
 }
@@ -1409,8 +1089,7 @@ static DEVICE_ATTR_RO(gen_cap);
 static ssize_t configurable_show(struct device *dev,
                                 struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
 }
@@ -1419,18 +1098,16 @@ static DEVICE_ATTR_RO(configurable);
 static ssize_t clients_show(struct device *dev,
                            struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
-       unsigned long flags;
+       struct idxd_device *idxd = confdev_to_idxd(dev);
        int count = 0, i;
 
-       spin_lock_irqsave(&idxd->dev_lock, flags);
+       spin_lock(&idxd->dev_lock);
        for (i = 0; i < idxd->max_wqs; i++) {
                struct idxd_wq *wq = idxd->wqs[i];
 
                count += wq->client_count;
        }
-       spin_unlock_irqrestore(&idxd->dev_lock, flags);
+       spin_unlock(&idxd->dev_lock);
 
        return sysfs_emit(buf, "%d\n", count);
 }
@@ -1439,8 +1116,7 @@ static DEVICE_ATTR_RO(clients);
 static ssize_t pasid_enabled_show(struct device *dev,
                                  struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", device_pasid_enabled(idxd));
 }
@@ -1449,12 +1125,10 @@ static DEVICE_ATTR_RO(pasid_enabled);
 static ssize_t state_show(struct device *dev,
                          struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        switch (idxd->state) {
        case IDXD_DEV_DISABLED:
-       case IDXD_DEV_CONF_READY:
                return sysfs_emit(buf, "disabled\n");
        case IDXD_DEV_ENABLED:
                return sysfs_emit(buf, "enabled\n");
@@ -1469,15 +1143,13 @@ static DEVICE_ATTR_RO(state);
 static ssize_t errors_show(struct device *dev,
                           struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
        int i, out = 0;
-       unsigned long flags;
 
-       spin_lock_irqsave(&idxd->dev_lock, flags);
+       spin_lock(&idxd->dev_lock);
        for (i = 0; i < 4; i++)
                out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]);
-       spin_unlock_irqrestore(&idxd->dev_lock, flags);
+       spin_unlock(&idxd->dev_lock);
        out--;
        out += sysfs_emit_at(buf, out, "\n");
        return out;
@@ -1487,8 +1159,7 @@ static DEVICE_ATTR_RO(errors);
 static ssize_t max_tokens_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", idxd->max_tokens);
 }
@@ -1497,8 +1168,7 @@ static DEVICE_ATTR_RO(max_tokens);
 static ssize_t token_limit_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", idxd->token_limit);
 }
@@ -1507,8 +1177,7 @@ static ssize_t token_limit_store(struct device *dev,
                                 struct device_attribute *attr,
                                 const char *buf, size_t count)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
        unsigned long val;
        int rc;
 
@@ -1536,8 +1205,7 @@ static DEVICE_ATTR_RW(token_limit);
 static ssize_t cdev_major_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd =
-               container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%u\n", idxd->major);
 }
@@ -1546,11 +1214,20 @@ static DEVICE_ATTR_RO(cdev_major);
 static ssize_t cmd_status_show(struct device *dev,
                               struct device_attribute *attr, char *buf)
 {
-       struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        return sysfs_emit(buf, "%#x\n", idxd->cmd_status);
 }
-static DEVICE_ATTR_RO(cmd_status);
+
+static ssize_t cmd_status_store(struct device *dev, struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct idxd_device *idxd = confdev_to_idxd(dev);
+
+       idxd->cmd_status = 0;
+       return count;
+}
+static DEVICE_ATTR_RW(cmd_status);
 
 static struct attribute *idxd_device_attributes[] = {
        &dev_attr_version.attr,
@@ -1586,7 +1263,7 @@ static const struct attribute_group *idxd_attribute_groups[] = {
 
 static void idxd_conf_device_release(struct device *dev)
 {
-       struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev);
+       struct idxd_device *idxd = confdev_to_idxd(dev);
 
        kfree(idxd->groups);
        kfree(idxd->wqs);
@@ -1611,12 +1288,12 @@ struct device_type iax_device_type = {
 
 static int idxd_register_engine_devices(struct idxd_device *idxd)
 {
+       struct idxd_engine *engine;
        int i, j, rc;
 
        for (i = 0; i < idxd->max_engines; i++) {
-               struct idxd_engine *engine = idxd->engines[i];
-
-               rc = device_add(&engine->conf_dev);
+               engine = idxd->engines[i];
+               rc = device_add(engine_confdev(engine));
                if (rc < 0)
                        goto cleanup;
        }
@@ -1625,22 +1302,26 @@ static int idxd_register_engine_devices(struct idxd_device *idxd)
 
 cleanup:
        j = i - 1;
-       for (; i < idxd->max_engines; i++)
-               put_device(&idxd->engines[i]->conf_dev);
+       for (; i < idxd->max_engines; i++) {
+               engine = idxd->engines[i];
+               put_device(engine_confdev(engine));
+       }
 
-       while (j--)
-               device_unregister(&idxd->engines[j]->conf_dev);
+       while (j--) {
+               engine = idxd->engines[j];
+               device_unregister(engine_confdev(engine));
+       }
        return rc;
 }
 
 static int idxd_register_group_devices(struct idxd_device *idxd)
 {
+       struct idxd_group *group;
        int i, j, rc;
 
        for (i = 0; i < idxd->max_groups; i++) {
-               struct idxd_group *group = idxd->groups[i];
-
-               rc = device_add(&group->conf_dev);
+               group = idxd->groups[i];
+               rc = device_add(group_confdev(group));
                if (rc < 0)
                        goto cleanup;
        }
@@ -1649,22 +1330,26 @@ static int idxd_register_group_devices(struct idxd_device *idxd)
 
 cleanup:
        j = i - 1;
-       for (; i < idxd->max_groups; i++)
-               put_device(&idxd->groups[i]->conf_dev);
+       for (; i < idxd->max_groups; i++) {
+               group = idxd->groups[i];
+               put_device(group_confdev(group));
+       }
 
-       while (j--)
-               device_unregister(&idxd->groups[j]->conf_dev);
+       while (j--) {
+               group = idxd->groups[j];
+               device_unregister(group_confdev(group));
+       }
        return rc;
 }
 
 static int idxd_register_wq_devices(struct idxd_device *idxd)
 {
+       struct idxd_wq *wq;
        int i, rc, j;
 
        for (i = 0; i < idxd->max_wqs; i++) {
-               struct idxd_wq *wq = idxd->wqs[i];
-
-               rc = device_add(&wq->conf_dev);
+               wq = idxd->wqs[i];
+               rc = device_add(wq_confdev(wq));
                if (rc < 0)
                        goto cleanup;
        }
@@ -1673,11 +1358,15 @@ static int idxd_register_wq_devices(struct idxd_device *idxd)
 
 cleanup:
        j = i - 1;
-       for (; i < idxd->max_wqs; i++)
-               put_device(&idxd->wqs[i]->conf_dev);
+       for (; i < idxd->max_wqs; i++) {
+               wq = idxd->wqs[i];
+               put_device(wq_confdev(wq));
+       }
 
-       while (j--)
-               device_unregister(&idxd->wqs[j]->conf_dev);
+       while (j--) {
+               wq = idxd->wqs[j];
+               device_unregister(wq_confdev(wq));
+       }
        return rc;
 }
 
@@ -1686,7 +1375,7 @@ int idxd_register_devices(struct idxd_device *idxd)
        struct device *dev = &idxd->pdev->dev;
        int rc, i;
 
-       rc = device_add(&idxd->conf_dev);
+       rc = device_add(idxd_confdev(idxd));
        if (rc < 0)
                return rc;
 
@@ -1712,12 +1401,12 @@ int idxd_register_devices(struct idxd_device *idxd)
 
  err_group:
        for (i = 0; i < idxd->max_engines; i++)
-               device_unregister(&idxd->engines[i]->conf_dev);
+               device_unregister(engine_confdev(idxd->engines[i]));
  err_engine:
        for (i = 0; i < idxd->max_wqs; i++)
-               device_unregister(&idxd->wqs[i]->conf_dev);
+               device_unregister(wq_confdev(idxd->wqs[i]));
  err_wq:
-       device_del(&idxd->conf_dev);
+       device_del(idxd_confdev(idxd));
        return rc;
 }
 
@@ -1728,19 +1417,19 @@ void idxd_unregister_devices(struct idxd_device *idxd)
        for (i = 0; i < idxd->max_wqs; i++) {
                struct idxd_wq *wq = idxd->wqs[i];
 
-               device_unregister(&wq->conf_dev);
+               device_unregister(wq_confdev(wq));
        }
 
        for (i = 0; i < idxd->max_engines; i++) {
                struct idxd_engine *engine = idxd->engines[i];
 
-               device_unregister(&engine->conf_dev);
+               device_unregister(engine_confdev(engine));
        }
 
        for (i = 0; i < idxd->max_groups; i++) {
                struct idxd_group *group = idxd->groups[i];
 
-               device_unregister(&group->conf_dev);
+               device_unregister(group_confdev(group));
        }
 }
 
index df77040..e2b5129 100644 (file)
@@ -4319,6 +4319,7 @@ static ssize_t enable_store(struct device_driver *dev, const char *buf,
                            size_t count)
 {
        unsigned long val;
+       int err;
 
        if (!count || count > 11)
                return -EINVAL;
@@ -4327,7 +4328,10 @@ static ssize_t enable_store(struct device_driver *dev, const char *buf,
                return -EFAULT;
 
        /* Write a key */
-       sscanf(buf, "%lx", &val);
+       err = kstrtoul(buf, 16, &val);
+       if (err)
+               return err;
+
        dcr_write(ppc440spe_mq_dcr_host, DCRN_MQ0_XORBA, val);
        isync();
 
@@ -4368,7 +4372,7 @@ static ssize_t poly_store(struct device_driver *dev, const char *buf,
                          size_t count)
 {
        unsigned long reg, val;
-
+       int err;
 #ifdef CONFIG_440SP
        /* 440SP uses default 0x14D polynomial only */
        return -EINVAL;
@@ -4378,7 +4382,9 @@ static ssize_t poly_store(struct device_driver *dev, const char *buf,
                return -EINVAL;
 
        /* e.g., 0x14D or 0x11D */
-       sscanf(buf, "%lx", &val);
+       err = kstrtoul(buf, 16, &val);
+       if (err)
+               return err;
 
        if (val & ~0x1FF)
                return -EINVAL;
diff --git a/drivers/dma/ptdma/Kconfig b/drivers/dma/ptdma/Kconfig
new file mode 100644 (file)
index 0000000..b430edd
--- /dev/null
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config AMD_PTDMA
+       tristate  "AMD PassThru DMA Engine"
+       depends on X86_64 && PCI
+       select DMA_ENGINE
+       select DMA_VIRTUAL_CHANNELS
+       help
+         Enable support for the AMD PTDMA controller. This controller
+         provides DMA capabilities to perform high bandwidth memory to
+         memory and IO copy operations. It performs DMA transfer through
+         queue-based descriptor management. This DMA controller is intended
+         to be used with AMD Non-Transparent Bridge devices and not for
+         general purpose peripheral DMA.
diff --git a/drivers/dma/ptdma/Makefile b/drivers/dma/ptdma/Makefile
new file mode 100644 (file)
index 0000000..ce54102
--- /dev/null
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# AMD Passthru DMA driver
+#
+
+obj-$(CONFIG_AMD_PTDMA) += ptdma.o
+
+ptdma-objs := ptdma-dev.o ptdma-dmaengine.o ptdma-debugfs.o
+
+ptdma-$(CONFIG_PCI) += ptdma-pci.o
diff --git a/drivers/dma/ptdma/ptdma-debugfs.c b/drivers/dma/ptdma/ptdma-debugfs.c
new file mode 100644 (file)
index 0000000..c8307d3
--- /dev/null
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthrough DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#include "ptdma.h"
+
+/* DebugFS helpers */
+#define        RI_VERSION_NUM  0x0000003F
+
+#define        RI_NUM_VQM      0x00078000
+#define        RI_NVQM_SHIFT   15
+
+static int pt_debugfs_info_show(struct seq_file *s, void *p)
+{
+       struct pt_device *pt = s->private;
+       unsigned int regval;
+
+       seq_printf(s, "Device name: %s\n", dev_name(pt->dev));
+       seq_printf(s, "   # Queues: %d\n", 1);
+       seq_printf(s, "     # Cmds: %d\n", pt->cmd_count);
+
+       regval = ioread32(pt->io_regs + CMD_PT_VERSION);
+
+       seq_printf(s, "    Version: %d\n", regval & RI_VERSION_NUM);
+       seq_puts(s, "    Engines:");
+       seq_puts(s, "\n");
+       seq_printf(s, "     Queues: %d\n", (regval & RI_NUM_VQM) >> RI_NVQM_SHIFT);
+
+       return 0;
+}
+
+/*
+ * Return a formatted buffer containing the current
+ * statistics of queue for PTDMA
+ */
+static int pt_debugfs_stats_show(struct seq_file *s, void *p)
+{
+       struct pt_device *pt = s->private;
+
+       seq_printf(s, "Total Interrupts Handled: %ld\n", pt->total_interrupts);
+
+       return 0;
+}
+
+static int pt_debugfs_queue_show(struct seq_file *s, void *p)
+{
+       struct pt_cmd_queue *cmd_q = s->private;
+       unsigned int regval;
+
+       if (!cmd_q)
+               return 0;
+
+       seq_printf(s, "               Pass-Thru: %ld\n", cmd_q->total_pt_ops);
+
+       regval = ioread32(cmd_q->reg_control + 0x000C);
+
+       seq_puts(s, "      Enabled Interrupts:");
+       if (regval & INT_EMPTY_QUEUE)
+               seq_puts(s, " EMPTY");
+       if (regval & INT_QUEUE_STOPPED)
+               seq_puts(s, " STOPPED");
+       if (regval & INT_ERROR)
+               seq_puts(s, " ERROR");
+       if (regval & INT_COMPLETION)
+               seq_puts(s, " COMPLETION");
+       seq_puts(s, "\n");
+
+       return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_info);
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_queue);
+DEFINE_SHOW_ATTRIBUTE(pt_debugfs_stats);
+
+void ptdma_debugfs_setup(struct pt_device *pt)
+{
+       struct pt_cmd_queue *cmd_q;
+       struct dentry *debugfs_q_instance;
+
+       if (!debugfs_initialized())
+               return;
+
+       debugfs_create_file("info", 0400, pt->dma_dev.dbg_dev_root, pt,
+                           &pt_debugfs_info_fops);
+
+       debugfs_create_file("stats", 0400, pt->dma_dev.dbg_dev_root, pt,
+                           &pt_debugfs_stats_fops);
+
+       cmd_q = &pt->cmd_q;
+
+       debugfs_q_instance =
+               debugfs_create_dir("q", pt->dma_dev.dbg_dev_root);
+
+       debugfs_create_file("stats", 0400, debugfs_q_instance, cmd_q,
+                           &pt_debugfs_queue_fops);
+}
diff --git a/drivers/dma/ptdma/ptdma-dev.c b/drivers/dma/ptdma/ptdma-dev.c
new file mode 100644 (file)
index 0000000..8a6bf29
--- /dev/null
@@ -0,0 +1,305 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/debugfs.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "ptdma.h"
+
+/* Human-readable error strings */
+static char *pt_error_codes[] = {
+       "",
+       "ERR 01: ILLEGAL_ENGINE",
+       "ERR 03: ILLEGAL_FUNCTION_TYPE",
+       "ERR 04: ILLEGAL_FUNCTION_MODE",
+       "ERR 06: ILLEGAL_FUNCTION_SIZE",
+       "ERR 08: ILLEGAL_FUNCTION_RSVD",
+       "ERR 09: ILLEGAL_BUFFER_LENGTH",
+       "ERR 10: VLSB_FAULT",
+       "ERR 11: ILLEGAL_MEM_ADDR",
+       "ERR 12: ILLEGAL_MEM_SEL",
+       "ERR 13: ILLEGAL_CONTEXT_ID",
+       "ERR 15: 0xF Reserved",
+       "ERR 18: CMD_TIMEOUT",
+       "ERR 19: IDMA0_AXI_SLVERR",
+       "ERR 20: IDMA0_AXI_DECERR",
+       "ERR 21: 0x15 Reserved",
+       "ERR 22: IDMA1_AXI_SLAVE_FAULT",
+       "ERR 23: IDMA1_AIXI_DECERR",
+       "ERR 24: 0x18 Reserved",
+       "ERR 27: 0x1B Reserved",
+       "ERR 38: ODMA0_AXI_SLVERR",
+       "ERR 39: ODMA0_AXI_DECERR",
+       "ERR 40: 0x28 Reserved",
+       "ERR 41: ODMA1_AXI_SLVERR",
+       "ERR 42: ODMA1_AXI_DECERR",
+       "ERR 43: LSB_PARITY_ERR",
+};
+
+static void pt_log_error(struct pt_device *d, int e)
+{
+       dev_err(d->dev, "PTDMA error: %s (0x%x)\n", pt_error_codes[e], e);
+}
+
+void pt_start_queue(struct pt_cmd_queue *cmd_q)
+{
+       /* Turn on the run bit */
+       iowrite32(cmd_q->qcontrol | CMD_Q_RUN, cmd_q->reg_control);
+}
+
+void pt_stop_queue(struct pt_cmd_queue *cmd_q)
+{
+       /* Turn off the run bit */
+       iowrite32(cmd_q->qcontrol & ~CMD_Q_RUN, cmd_q->reg_control);
+}
+
+static int pt_core_execute_cmd(struct ptdma_desc *desc, struct pt_cmd_queue *cmd_q)
+{
+       bool soc = FIELD_GET(DWORD0_SOC, desc->dw0);
+       u8 *q_desc = (u8 *)&cmd_q->qbase[cmd_q->qidx];
+       u32 tail;
+
+       if (soc) {
+               desc->dw0 |= FIELD_PREP(DWORD0_IOC, desc->dw0);
+               desc->dw0 &= ~DWORD0_SOC;
+       }
+       mutex_lock(&cmd_q->q_mutex);
+
+       /* Copy 32-byte command descriptor to hw queue. */
+       memcpy(q_desc, desc, 32);
+       cmd_q->qidx = (cmd_q->qidx + 1) % CMD_Q_LEN;
+
+       /* The data used by this command must be flushed to memory */
+       wmb();
+
+       /* Write the new tail address back to the queue register */
+       tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+       iowrite32(tail, cmd_q->reg_control + 0x0004);
+
+       /* Turn the queue back on using our cached control register */
+       pt_start_queue(cmd_q);
+       mutex_unlock(&cmd_q->q_mutex);
+
+       return 0;
+}
+
+int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
+                            struct pt_passthru_engine *pt_engine)
+{
+       struct ptdma_desc desc;
+
+       cmd_q->cmd_error = 0;
+       cmd_q->total_pt_ops++;
+       memset(&desc, 0, sizeof(desc));
+       desc.dw0 = CMD_DESC_DW0_VAL;
+       desc.length = pt_engine->src_len;
+       desc.src_lo = lower_32_bits(pt_engine->src_dma);
+       desc.dw3.src_hi = upper_32_bits(pt_engine->src_dma);
+       desc.dst_lo = lower_32_bits(pt_engine->dst_dma);
+       desc.dw5.dst_hi = upper_32_bits(pt_engine->dst_dma);
+
+       return pt_core_execute_cmd(&desc, cmd_q);
+}
+
+static inline void pt_core_disable_queue_interrupts(struct pt_device *pt)
+{
+       iowrite32(0, pt->cmd_q.reg_control + 0x000C);
+}
+
+static inline void pt_core_enable_queue_interrupts(struct pt_device *pt)
+{
+       iowrite32(SUPPORTED_INTERRUPTS, pt->cmd_q.reg_control + 0x000C);
+}
+
+static void pt_do_cmd_complete(unsigned long data)
+{
+       struct pt_tasklet_data *tdata = (struct pt_tasklet_data *)data;
+       struct pt_cmd *cmd = tdata->cmd;
+       struct pt_cmd_queue *cmd_q = &cmd->pt->cmd_q;
+       u32 tail;
+
+       if (cmd_q->cmd_error) {
+              /*
+               * Log the error and flush the queue by
+               * moving the head pointer
+               */
+               tail = lower_32_bits(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
+               pt_log_error(cmd_q->pt, cmd_q->cmd_error);
+               iowrite32(tail, cmd_q->reg_control + 0x0008);
+       }
+
+       cmd->pt_cmd_callback(cmd->data, cmd->ret);
+}
+
+static irqreturn_t pt_core_irq_handler(int irq, void *data)
+{
+       struct pt_device *pt = data;
+       struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+       u32 status;
+
+       pt_core_disable_queue_interrupts(pt);
+       pt->total_interrupts++;
+       status = ioread32(cmd_q->reg_control + 0x0010);
+       if (status) {
+               cmd_q->int_status = status;
+               cmd_q->q_status = ioread32(cmd_q->reg_control + 0x0100);
+               cmd_q->q_int_status = ioread32(cmd_q->reg_control + 0x0104);
+
+               /* On error, only save the first error value */
+               if ((status & INT_ERROR) && !cmd_q->cmd_error)
+                       cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
+
+               /* Acknowledge the interrupt */
+               iowrite32(status, cmd_q->reg_control + 0x0010);
+               pt_core_enable_queue_interrupts(pt);
+               pt_do_cmd_complete((ulong)&pt->tdata);
+       }
+       return IRQ_HANDLED;
+}
+
+int pt_core_init(struct pt_device *pt)
+{
+       char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
+       struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+       u32 dma_addr_lo, dma_addr_hi;
+       struct device *dev = pt->dev;
+       struct dma_pool *dma_pool;
+       int ret;
+
+       /* Allocate a dma pool for the queue */
+       snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q", dev_name(pt->dev));
+
+       dma_pool = dma_pool_create(dma_pool_name, dev,
+                                  PT_DMAPOOL_MAX_SIZE,
+                                  PT_DMAPOOL_ALIGN, 0);
+       if (!dma_pool)
+               return -ENOMEM;
+
+       /* ptdma core initialisation */
+       iowrite32(CMD_CONFIG_VHB_EN, pt->io_regs + CMD_CONFIG_OFFSET);
+       iowrite32(CMD_QUEUE_PRIO, pt->io_regs + CMD_QUEUE_PRIO_OFFSET);
+       iowrite32(CMD_TIMEOUT_DISABLE, pt->io_regs + CMD_TIMEOUT_OFFSET);
+       iowrite32(CMD_CLK_GATE_CONFIG, pt->io_regs + CMD_CLK_GATE_CTL_OFFSET);
+       iowrite32(CMD_CONFIG_REQID, pt->io_regs + CMD_REQID_CONFIG_OFFSET);
+
+       cmd_q->pt = pt;
+       cmd_q->dma_pool = dma_pool;
+       mutex_init(&cmd_q->q_mutex);
+
+       /* Page alignment satisfies our needs for N <= 128 */
+       cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
+       cmd_q->qbase = dma_alloc_coherent(dev, cmd_q->qsize,
+                                         &cmd_q->qbase_dma,
+                                         GFP_KERNEL);
+       if (!cmd_q->qbase) {
+               dev_err(dev, "unable to allocate command queue\n");
+               ret = -ENOMEM;
+               goto e_dma_alloc;
+       }
+
+       cmd_q->qidx = 0;
+
+       /* Preset some register values */
+       cmd_q->reg_control = pt->io_regs + CMD_Q_STATUS_INCR;
+
+       /* Turn off the queues and disable interrupts until ready */
+       pt_core_disable_queue_interrupts(pt);
+
+       cmd_q->qcontrol = 0; /* Start with nothing */
+       iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+       ioread32(cmd_q->reg_control + 0x0104);
+       ioread32(cmd_q->reg_control + 0x0100);
+
+       /* Clear the interrupt status */
+       iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
+
+       /* Request an irq */
+       ret = request_irq(pt->pt_irq, pt_core_irq_handler, 0, dev_name(pt->dev), pt);
+       if (ret)
+               goto e_pool;
+
+       /* Update the device registers with queue information. */
+       cmd_q->qcontrol &= ~CMD_Q_SIZE;
+       cmd_q->qcontrol |= FIELD_PREP(CMD_Q_SIZE, QUEUE_SIZE_VAL);
+
+       cmd_q->qdma_tail = cmd_q->qbase_dma;
+       dma_addr_lo = lower_32_bits(cmd_q->qdma_tail);
+       iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0004);
+       iowrite32((u32)dma_addr_lo, cmd_q->reg_control + 0x0008);
+
+       dma_addr_hi = upper_32_bits(cmd_q->qdma_tail);
+       cmd_q->qcontrol |= (dma_addr_hi << 16);
+       iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
+
+       pt_core_enable_queue_interrupts(pt);
+
+       /* Register the DMA engine support */
+       ret = pt_dmaengine_register(pt);
+       if (ret)
+               goto e_dmaengine;
+
+       /* Set up debugfs entries */
+       ptdma_debugfs_setup(pt);
+
+       return 0;
+
+e_dmaengine:
+       free_irq(pt->pt_irq, pt);
+
+e_dma_alloc:
+       dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase, cmd_q->qbase_dma);
+
+e_pool:
+       dev_err(dev, "unable to allocate an IRQ\n");
+       dma_pool_destroy(pt->cmd_q.dma_pool);
+
+       return ret;
+}
+
+void pt_core_destroy(struct pt_device *pt)
+{
+       struct device *dev = pt->dev;
+       struct pt_cmd_queue *cmd_q = &pt->cmd_q;
+       struct pt_cmd *cmd;
+
+       /* Unregister the DMA engine */
+       pt_dmaengine_unregister(pt);
+
+       /* Disable and clear interrupts */
+       pt_core_disable_queue_interrupts(pt);
+
+       /* Turn off the run bit */
+       pt_stop_queue(cmd_q);
+
+       /* Clear the interrupt status */
+       iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
+       ioread32(cmd_q->reg_control + 0x0104);
+       ioread32(cmd_q->reg_control + 0x0100);
+
+       free_irq(pt->pt_irq, pt);
+
+       dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
+                         cmd_q->qbase_dma);
+
+       /* Flush the cmd queue */
+       while (!list_empty(&pt->cmd)) {
+               /* Invoke the callback directly with an error code */
+               cmd = list_first_entry(&pt->cmd, struct pt_cmd, entry);
+               list_del(&cmd->entry);
+               cmd->pt_cmd_callback(cmd->data, -ENODEV);
+       }
+}
diff --git a/drivers/dma/ptdma/ptdma-dmaengine.c b/drivers/dma/ptdma/ptdma-dmaengine.c
new file mode 100644 (file)
index 0000000..c9e52f6
--- /dev/null
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthrough DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include "ptdma.h"
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
+{
+       return container_of(dma_chan, struct pt_dma_chan, vc.chan);
+}
+
+static inline struct pt_dma_desc *to_pt_desc(struct virt_dma_desc *vd)
+{
+       return container_of(vd, struct pt_dma_desc, vd);
+}
+
+static void pt_free_chan_resources(struct dma_chan *dma_chan)
+{
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+
+       vchan_free_chan_resources(&chan->vc);
+}
+
+static void pt_synchronize(struct dma_chan *dma_chan)
+{
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+
+       vchan_synchronize(&chan->vc);
+}
+
+static void pt_do_cleanup(struct virt_dma_desc *vd)
+{
+       struct pt_dma_desc *desc = to_pt_desc(vd);
+       struct pt_device *pt = desc->pt;
+
+       kmem_cache_free(pt->dma_desc_cache, desc);
+}
+
+static int pt_dma_start_desc(struct pt_dma_desc *desc)
+{
+       struct pt_passthru_engine *pt_engine;
+       struct pt_device *pt;
+       struct pt_cmd *pt_cmd;
+       struct pt_cmd_queue *cmd_q;
+
+       desc->issued_to_hw = 1;
+
+       pt_cmd = &desc->pt_cmd;
+       pt = pt_cmd->pt;
+       cmd_q = &pt->cmd_q;
+       pt_engine = &pt_cmd->passthru;
+
+       pt->tdata.cmd = pt_cmd;
+
+       /* Execute the command */
+       pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine);
+
+       return 0;
+}
+
+static struct pt_dma_desc *pt_next_dma_desc(struct pt_dma_chan *chan)
+{
+       /* Get the next DMA descriptor on the active list */
+       struct virt_dma_desc *vd = vchan_next_desc(&chan->vc);
+
+       return vd ? to_pt_desc(vd) : NULL;
+}
+
+static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
+                                                struct pt_dma_desc *desc)
+{
+       struct dma_async_tx_descriptor *tx_desc;
+       struct virt_dma_desc *vd;
+       unsigned long flags;
+
+       /* Loop over descriptors until one is found with commands */
+       do {
+               if (desc) {
+                       if (!desc->issued_to_hw) {
+                               /* No errors, keep going */
+                               if (desc->status != DMA_ERROR)
+                                       return desc;
+                       }
+
+                       tx_desc = &desc->vd.tx;
+                       vd = &desc->vd;
+               } else {
+                       tx_desc = NULL;
+               }
+
+               spin_lock_irqsave(&chan->vc.lock, flags);
+
+               if (desc) {
+                       if (desc->status != DMA_ERROR)
+                               desc->status = DMA_COMPLETE;
+
+                       dma_cookie_complete(tx_desc);
+                       dma_descriptor_unmap(tx_desc);
+                       list_del(&desc->vd.node);
+               }
+
+               desc = pt_next_dma_desc(chan);
+
+               spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+               if (tx_desc) {
+                       dmaengine_desc_get_callback_invoke(tx_desc, NULL);
+                       dma_run_dependencies(tx_desc);
+                       vchan_vdesc_fini(vd);
+               }
+       } while (desc);
+
+       return NULL;
+}
+
+static void pt_cmd_callback(void *data, int err)
+{
+       struct pt_dma_desc *desc = data;
+       struct dma_chan *dma_chan;
+       struct pt_dma_chan *chan;
+       int ret;
+
+       if (err == -EINPROGRESS)
+               return;
+
+       dma_chan = desc->vd.tx.chan;
+       chan = to_pt_chan(dma_chan);
+
+       if (err)
+               desc->status = DMA_ERROR;
+
+       while (true) {
+               /* Check for DMA descriptor completion */
+               desc = pt_handle_active_desc(chan, desc);
+
+               /* Don't submit cmd if no descriptor or DMA is paused */
+               if (!desc)
+                       break;
+
+               ret = pt_dma_start_desc(desc);
+               if (!ret)
+                       break;
+
+               desc->status = DMA_ERROR;
+       }
+}
+
+static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
+                                            unsigned long flags)
+{
+       struct pt_dma_desc *desc;
+
+       desc = kmem_cache_zalloc(chan->pt->dma_desc_cache, GFP_NOWAIT);
+       if (!desc)
+               return NULL;
+
+       vchan_tx_prep(&chan->vc, &desc->vd, flags);
+
+       desc->pt = chan->pt;
+       desc->issued_to_hw = 0;
+       desc->status = DMA_IN_PROGRESS;
+
+       return desc;
+}
+
+static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
+                                         dma_addr_t dst,
+                                         dma_addr_t src,
+                                         unsigned int len,
+                                         unsigned long flags)
+{
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+       struct pt_passthru_engine *pt_engine;
+       struct pt_dma_desc *desc;
+       struct pt_cmd *pt_cmd;
+
+       desc = pt_alloc_dma_desc(chan, flags);
+       if (!desc)
+               return NULL;
+
+       pt_cmd = &desc->pt_cmd;
+       pt_cmd->pt = chan->pt;
+       pt_engine = &pt_cmd->passthru;
+       pt_cmd->engine = PT_ENGINE_PASSTHRU;
+       pt_engine->src_dma = src;
+       pt_engine->dst_dma = dst;
+       pt_engine->src_len = len;
+       pt_cmd->pt_cmd_callback = pt_cmd_callback;
+       pt_cmd->data = desc;
+
+       desc->len = len;
+
+       return desc;
+}
+
+static struct dma_async_tx_descriptor *
+pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst,
+                  dma_addr_t src, size_t len, unsigned long flags)
+{
+       struct pt_dma_desc *desc;
+
+       desc = pt_create_desc(dma_chan, dst, src, len, flags);
+       if (!desc)
+               return NULL;
+
+       return &desc->vd.tx;
+}
+
+static struct dma_async_tx_descriptor *
+pt_prep_dma_interrupt(struct dma_chan *dma_chan, unsigned long flags)
+{
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+       struct pt_dma_desc *desc;
+
+       desc = pt_alloc_dma_desc(chan, flags);
+       if (!desc)
+               return NULL;
+
+       return &desc->vd.tx;
+}
+
+static void pt_issue_pending(struct dma_chan *dma_chan)
+{
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+       struct pt_dma_desc *desc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+
+       vchan_issue_pending(&chan->vc);
+
+       desc = pt_next_dma_desc(chan);
+
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       /* If there was nothing active, start processing */
+       if (desc)
+               pt_cmd_callback(desc, 0);
+}
+
+static int pt_pause(struct dma_chan *dma_chan)
+{
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       pt_stop_queue(&chan->pt->cmd_q);
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       return 0;
+}
+
+static int pt_resume(struct dma_chan *dma_chan)
+{
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+       struct pt_dma_desc *desc = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       pt_start_queue(&chan->pt->cmd_q);
+       desc = pt_next_dma_desc(chan);
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       /* If there was something active, re-start */
+       if (desc)
+               pt_cmd_callback(desc, 0);
+
+       return 0;
+}
+
+static int pt_terminate_all(struct dma_chan *dma_chan)
+{
+       struct pt_dma_chan *chan = to_pt_chan(dma_chan);
+       unsigned long flags;
+       LIST_HEAD(head);
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+       vchan_get_all_descriptors(&chan->vc, &head);
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+
+       vchan_dma_desc_free_list(&chan->vc, &head);
+       vchan_free_chan_resources(&chan->vc);
+
+       return 0;
+}
+
+int pt_dmaengine_register(struct pt_device *pt)
+{
+       struct pt_dma_chan *chan;
+       struct dma_device *dma_dev = &pt->dma_dev;
+       char *cmd_cache_name;
+       char *desc_cache_name;
+       int ret;
+
+       pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan),
+                                      GFP_KERNEL);
+       if (!pt->pt_dma_chan)
+               return -ENOMEM;
+
+       cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
+                                       "%s-dmaengine-cmd-cache",
+                                       dev_name(pt->dev));
+       if (!cmd_cache_name)
+               return -ENOMEM;
+
+       desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
+                                        "%s-dmaengine-desc-cache",
+                                        dev_name(pt->dev));
+       if (!desc_cache_name) {
+               ret = -ENOMEM;
+               goto err_cache;
+       }
+
+       pt->dma_desc_cache = kmem_cache_create(desc_cache_name,
+                                              sizeof(struct pt_dma_desc), 0,
+                                              SLAB_HWCACHE_ALIGN, NULL);
+       if (!pt->dma_desc_cache) {
+               ret = -ENOMEM;
+               goto err_cache;
+       }
+
+       dma_dev->dev = pt->dev;
+       dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
+       dma_dev->dst_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
+       dma_dev->directions = DMA_MEM_TO_MEM;
+       dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+       dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
+       dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
+
+       /*
+        * PTDMA is intended to be used with the AMD NTB devices, hence
+        * marking it as DMA_PRIVATE.
+        */
+       dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
+
+       INIT_LIST_HEAD(&dma_dev->channels);
+
+       chan = pt->pt_dma_chan;
+       chan->pt = pt;
+
+       /* Set base and prep routines */
+       dma_dev->device_free_chan_resources = pt_free_chan_resources;
+       dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy;
+       dma_dev->device_prep_dma_interrupt = pt_prep_dma_interrupt;
+       dma_dev->device_issue_pending = pt_issue_pending;
+       dma_dev->device_tx_status = dma_cookie_status;
+       dma_dev->device_pause = pt_pause;
+       dma_dev->device_resume = pt_resume;
+       dma_dev->device_terminate_all = pt_terminate_all;
+       dma_dev->device_synchronize = pt_synchronize;
+
+       chan->vc.desc_free = pt_do_cleanup;
+       vchan_init(&chan->vc, dma_dev);
+
+       dma_set_mask_and_coherent(pt->dev, DMA_BIT_MASK(64));
+
+       ret = dma_async_device_register(dma_dev);
+       if (ret)
+               goto err_reg;
+
+       return 0;
+
+err_reg:
+       kmem_cache_destroy(pt->dma_desc_cache);
+
+err_cache:
+       kmem_cache_destroy(pt->dma_cmd_cache);
+
+       return ret;
+}
+
+void pt_dmaengine_unregister(struct pt_device *pt)
+{
+       struct dma_device *dma_dev = &pt->dma_dev;
+
+       dma_async_device_unregister(dma_dev);
+
+       kmem_cache_destroy(pt->dma_desc_cache);
+       kmem_cache_destroy(pt->dma_cmd_cache);
+}
diff --git a/drivers/dma/ptdma/ptdma-pci.c b/drivers/dma/ptdma/ptdma-pci.c
new file mode 100644 (file)
index 0000000..22739ff
--- /dev/null
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/pci_ids.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+
+#include "ptdma.h"
+
+struct pt_msix {
+       int msix_count;
+       struct msix_entry msix_entry;
+};
+
+/*
+ * pt_alloc_struct - allocate and initialize the pt_device struct
+ *
+ * @dev: device struct of the PTDMA
+ */
+static struct pt_device *pt_alloc_struct(struct device *dev)
+{
+       struct pt_device *pt;
+
+       pt = devm_kzalloc(dev, sizeof(*pt), GFP_KERNEL);
+
+       if (!pt)
+               return NULL;
+       pt->dev = dev;
+
+       INIT_LIST_HEAD(&pt->cmd);
+
+       return pt;
+}
+
+static int pt_get_msix_irqs(struct pt_device *pt)
+{
+       struct pt_msix *pt_msix = pt->pt_msix;
+       struct device *dev = pt->dev;
+       struct pci_dev *pdev = to_pci_dev(dev);
+       int ret;
+
+       pt_msix->msix_entry.entry = 0;
+
+       ret = pci_enable_msix_range(pdev, &pt_msix->msix_entry, 1, 1);
+       if (ret < 0)
+               return ret;
+
+       pt_msix->msix_count = ret;
+
+       pt->pt_irq = pt_msix->msix_entry.vector;
+
+       return 0;
+}
+
+static int pt_get_msi_irq(struct pt_device *pt)
+{
+       struct device *dev = pt->dev;
+       struct pci_dev *pdev = to_pci_dev(dev);
+       int ret;
+
+       ret = pci_enable_msi(pdev);
+       if (ret)
+               return ret;
+
+       pt->pt_irq = pdev->irq;
+
+       return 0;
+}
+
+static int pt_get_irqs(struct pt_device *pt)
+{
+       struct device *dev = pt->dev;
+       int ret;
+
+       ret = pt_get_msix_irqs(pt);
+       if (!ret)
+               return 0;
+
+       /* Couldn't get MSI-X vectors, try MSI */
+       dev_err(dev, "could not enable MSI-X (%d), trying MSI\n", ret);
+       ret = pt_get_msi_irq(pt);
+       if (!ret)
+               return 0;
+
+       /* Couldn't get MSI interrupt */
+       dev_err(dev, "could not enable MSI (%d)\n", ret);
+
+       return ret;
+}
+
+static void pt_free_irqs(struct pt_device *pt)
+{
+       struct pt_msix *pt_msix = pt->pt_msix;
+       struct device *dev = pt->dev;
+       struct pci_dev *pdev = to_pci_dev(dev);
+
+       if (pt_msix->msix_count)
+               pci_disable_msix(pdev);
+       else if (pt->pt_irq)
+               pci_disable_msi(pdev);
+
+       pt->pt_irq = 0;
+}
+
+static int pt_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct pt_device *pt;
+       struct pt_msix *pt_msix;
+       struct device *dev = &pdev->dev;
+       void __iomem * const *iomap_table;
+       int bar_mask;
+       int ret = -ENOMEM;
+
+       pt = pt_alloc_struct(dev);
+       if (!pt)
+               goto e_err;
+
+       pt_msix = devm_kzalloc(dev, sizeof(*pt_msix), GFP_KERNEL);
+       if (!pt_msix)
+               goto e_err;
+
+       pt->pt_msix = pt_msix;
+       pt->dev_vdata = (struct pt_dev_vdata *)id->driver_data;
+       if (!pt->dev_vdata) {
+               ret = -ENODEV;
+               dev_err(dev, "missing driver data\n");
+               goto e_err;
+       }
+
+       ret = pcim_enable_device(pdev);
+       if (ret) {
+               dev_err(dev, "pcim_enable_device failed (%d)\n", ret);
+               goto e_err;
+       }
+
+       bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+       ret = pcim_iomap_regions(pdev, bar_mask, "ptdma");
+       if (ret) {
+               dev_err(dev, "pcim_iomap_regions failed (%d)\n", ret);
+               goto e_err;
+       }
+
+       iomap_table = pcim_iomap_table(pdev);
+       if (!iomap_table) {
+               dev_err(dev, "pcim_iomap_table failed\n");
+               ret = -ENOMEM;
+               goto e_err;
+       }
+
+       pt->io_regs = iomap_table[pt->dev_vdata->bar];
+       if (!pt->io_regs) {
+               dev_err(dev, "ioremap failed\n");
+               ret = -ENOMEM;
+               goto e_err;
+       }
+
+       ret = pt_get_irqs(pt);
+       if (ret)
+               goto e_err;
+
+       pci_set_master(pdev);
+
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(48));
+       if (ret) {
+               ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+               if (ret) {
+                       dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
+                               ret);
+                       goto e_err;
+               }
+       }
+
+       dev_set_drvdata(dev, pt);
+
+       if (pt->dev_vdata)
+               ret = pt_core_init(pt);
+
+       if (ret)
+               goto e_err;
+
+       return 0;
+
+e_err:
+       dev_err(dev, "initialization failed ret = %d\n", ret);
+
+       return ret;
+}
+
+static void pt_pci_remove(struct pci_dev *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct pt_device *pt = dev_get_drvdata(dev);
+
+       if (!pt)
+               return;
+
+       if (pt->dev_vdata)
+               pt_core_destroy(pt);
+
+       pt_free_irqs(pt);
+}
+
+static const struct pt_dev_vdata dev_vdata[] = {
+       {
+               .bar = 2,
+       },
+};
+
+static const struct pci_device_id pt_pci_table[] = {
+       { PCI_VDEVICE(AMD, 0x1498), (kernel_ulong_t)&dev_vdata[0] },
+       /* Last entry must be zero */
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, pt_pci_table);
+
+static struct pci_driver pt_pci_driver = {
+       .name = "ptdma",
+       .id_table = pt_pci_table,
+       .probe = pt_pci_probe,
+       .remove = pt_pci_remove,
+};
+
+module_pci_driver(pt_pci_driver);
+
+MODULE_AUTHOR("Sanjay R Mehta <sanju.mehta@amd.com>");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("AMD PassThru DMA driver");
diff --git a/drivers/dma/ptdma/ptdma.h b/drivers/dma/ptdma/ptdma.h
new file mode 100644 (file)
index 0000000..afbf192
--- /dev/null
@@ -0,0 +1,324 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * AMD Passthru DMA device driver
+ * -- Based on the CCP driver
+ *
+ * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
+ *
+ * Author: Sanjay R Mehta <sanju.mehta@amd.com>
+ * Author: Tom Lendacky <thomas.lendacky@amd.com>
+ * Author: Gary R Hook <gary.hook@amd.com>
+ */
+
+#ifndef __PT_DEV_H__
+#define __PT_DEV_H__
+
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/pci.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/wait.h>
+#include <linux/dmapool.h>
+
+#include "../virt-dma.h"
+
+#define MAX_PT_NAME_LEN                        16
+#define MAX_DMAPOOL_NAME_LEN           32
+
+#define MAX_HW_QUEUES                  1
+#define MAX_CMD_QLEN                   100
+
+#define PT_ENGINE_PASSTHRU             5
+
+/* Register Mappings */
+#define IRQ_MASK_REG                   0x040
+#define IRQ_STATUS_REG                 0x200
+
+#define CMD_Q_ERROR(__qs)              ((__qs) & 0x0000003f)
+
+#define CMD_QUEUE_PRIO_OFFSET          0x00
+#define CMD_REQID_CONFIG_OFFSET                0x04
+#define CMD_TIMEOUT_OFFSET             0x08
+#define CMD_PT_VERSION                 0x10
+
+#define CMD_Q_CONTROL_BASE             0x0000
+#define CMD_Q_TAIL_LO_BASE             0x0004
+#define CMD_Q_HEAD_LO_BASE             0x0008
+#define CMD_Q_INT_ENABLE_BASE          0x000C
+#define CMD_Q_INTERRUPT_STATUS_BASE    0x0010
+
+#define CMD_Q_STATUS_BASE              0x0100
+#define CMD_Q_INT_STATUS_BASE          0x0104
+#define CMD_Q_DMA_STATUS_BASE          0x0108
+#define CMD_Q_DMA_READ_STATUS_BASE     0x010C
+#define CMD_Q_DMA_WRITE_STATUS_BASE    0x0110
+#define CMD_Q_ABORT_BASE               0x0114
+#define CMD_Q_AX_CACHE_BASE            0x0118
+
+#define CMD_CONFIG_OFFSET              0x1120
+#define CMD_CLK_GATE_CTL_OFFSET                0x6004
+
+#define CMD_DESC_DW0_VAL               0x500012
+
+/* Address offset for virtual queue registers */
+#define CMD_Q_STATUS_INCR              0x1000
+
+/* Bit masks */
+#define CMD_CONFIG_REQID               0
+#define CMD_TIMEOUT_DISABLE            0
+#define CMD_CLK_DYN_GATING_DIS         0
+#define CMD_CLK_SW_GATE_MODE           0
+#define CMD_CLK_GATE_CTL               0
+#define CMD_QUEUE_PRIO                 GENMASK(2, 1)
+#define CMD_CONFIG_VHB_EN              BIT(0)
+#define CMD_CLK_DYN_GATING_EN          BIT(0)
+#define CMD_CLK_HW_GATE_MODE           BIT(0)
+#define CMD_CLK_GATE_ON_DELAY          BIT(12)
+#define CMD_CLK_GATE_OFF_DELAY         BIT(12)
+
+#define CMD_CLK_GATE_CONFIG            (CMD_CLK_GATE_CTL | \
+                                       CMD_CLK_HW_GATE_MODE | \
+                                       CMD_CLK_GATE_ON_DELAY | \
+                                       CMD_CLK_DYN_GATING_EN | \
+                                       CMD_CLK_GATE_OFF_DELAY)
+
+#define CMD_Q_LEN                      32
+#define CMD_Q_RUN                      BIT(0)
+#define CMD_Q_HALT                     BIT(1)
+#define CMD_Q_MEM_LOCATION             BIT(2)
+#define CMD_Q_SIZE_MASK                        GENMASK(4, 0)
+#define CMD_Q_SIZE                     GENMASK(7, 3)
+#define CMD_Q_SHIFT                    GENMASK(1, 0)
+#define QUEUE_SIZE_VAL                 ((ffs(CMD_Q_LEN) - 2) & \
+                                                                 CMD_Q_SIZE_MASK)
+#define Q_PTR_MASK                     (2 << (QUEUE_SIZE_VAL + 5) - 1)
+#define Q_DESC_SIZE                    sizeof(struct ptdma_desc)
+#define Q_SIZE(n)                      (CMD_Q_LEN * (n))
+
+#define INT_COMPLETION                 BIT(0)
+#define INT_ERROR                      BIT(1)
+#define INT_QUEUE_STOPPED              BIT(2)
+#define INT_EMPTY_QUEUE                        BIT(3)
+#define SUPPORTED_INTERRUPTS           (INT_COMPLETION | INT_ERROR)
+
+/****** Local Storage Block ******/
+#define LSB_START                      0
+#define LSB_END                                127
+#define LSB_COUNT                      (LSB_END - LSB_START + 1)
+
+#define PT_DMAPOOL_MAX_SIZE            64
+#define PT_DMAPOOL_ALIGN               BIT(5)
+
+#define PT_PASSTHRU_BLOCKSIZE          512
+
+struct pt_device;
+
+struct pt_tasklet_data {
+       struct completion completion;
+       struct pt_cmd *cmd;
+};
+
+/*
+ * struct pt_passthru_engine - pass-through operation
+ *   without performing DMA mapping
+ * @mask: mask to be applied to data
+ * @mask_len: length in bytes of mask
+ * @src_dma: data to be used for this operation
+ * @dst_dma: data produced by this operation
+ * @src_len: length in bytes of data used for this operation
+ *
+ * Variables required to be set when calling pt_enqueue_cmd():
+ *   - bit_mod, byte_swap, src, dst, src_len
+ *   - mask, mask_len if bit_mod is not PT_PASSTHRU_BITWISE_NOOP
+ */
+struct pt_passthru_engine {
+       dma_addr_t mask;
+       u32 mask_len;           /* In bytes */
+
+       dma_addr_t src_dma, dst_dma;
+       u64 src_len;            /* In bytes */
+};
+
+/*
+ * struct pt_cmd - PTDMA operation request
+ * @entry: list element
+ * @work: work element used for callbacks
+ * @pt: PT device to be run on
+ * @ret: operation return code
+ * @flags: cmd processing flags
+ * @engine: PTDMA operation to perform (passthru)
+ * @engine_error: PT engine return code
+ * @passthru: engine specific structures, refer to specific engine struct below
+ * @callback: operation completion callback function
+ * @data: parameter value to be supplied to the callback function
+ *
+ * Variables required to be set when calling pt_enqueue_cmd():
+ *   - engine, callback
+ *   - See the operation structures below for what is required for each
+ *     operation.
+ */
+struct pt_cmd {
+       struct list_head entry;
+       struct work_struct work;
+       struct pt_device *pt;
+       int ret;
+       u32 engine;
+       u32 engine_error;
+       struct pt_passthru_engine passthru;
+       /* Completion callback support */
+       void (*pt_cmd_callback)(void *data, int err);
+       void *data;
+};
+
+struct pt_dma_desc {
+       struct virt_dma_desc vd;
+       struct pt_device *pt;
+       enum dma_status status;
+       size_t len;
+       bool issued_to_hw;
+       struct pt_cmd pt_cmd;
+};
+
+struct pt_dma_chan {
+       struct virt_dma_chan vc;
+       struct pt_device *pt;
+};
+
+struct pt_cmd_queue {
+       struct pt_device *pt;
+
+       /* Queue dma pool */
+       struct dma_pool *dma_pool;
+
+       /* Queue base address (not neccessarily aligned)*/
+       struct ptdma_desc *qbase;
+
+       /* Aligned queue start address (per requirement) */
+       struct mutex q_mutex ____cacheline_aligned;
+       unsigned int qidx;
+
+       unsigned int qsize;
+       dma_addr_t qbase_dma;
+       dma_addr_t qdma_tail;
+
+       unsigned int active;
+       unsigned int suspended;
+
+       /* Register addresses for queue */
+       void __iomem *reg_control;
+       u32 qcontrol; /* Cached control register */
+
+       /* Status values from job */
+       u32 int_status;
+       u32 q_status;
+       u32 q_int_status;
+       u32 cmd_error;
+       /* Queue Statistics */
+       unsigned long total_pt_ops;
+} ____cacheline_aligned;
+
+struct pt_device {
+       struct list_head entry;
+
+       unsigned int ord;
+       char name[MAX_PT_NAME_LEN];
+
+       struct device *dev;
+
+       /* Bus specific device information */
+       struct pt_msix *pt_msix;
+
+       struct pt_dev_vdata *dev_vdata;
+
+       unsigned int pt_irq;
+
+       /* I/O area used for device communication */
+       void __iomem *io_regs;
+
+       spinlock_t cmd_lock ____cacheline_aligned;
+       unsigned int cmd_count;
+       struct list_head cmd;
+
+       /*
+        * The command queue. This represent the queue available on the
+        * PTDMA that are available for processing cmds
+        */
+       struct pt_cmd_queue cmd_q;
+
+       /* Support for the DMA Engine capabilities */
+       struct dma_device dma_dev;
+       struct pt_dma_chan *pt_dma_chan;
+       struct kmem_cache *dma_cmd_cache;
+       struct kmem_cache *dma_desc_cache;
+
+       wait_queue_head_t lsb_queue;
+
+       /* Device Statistics */
+       unsigned long total_interrupts;
+
+       struct pt_tasklet_data tdata;
+};
+
+/*
+ * descriptor for PTDMA commands
+ * 8 32-bit words:
+ * word 0: function; engine; control bits
+ * word 1: length of source data
+ * word 2: low 32 bits of source pointer
+ * word 3: upper 16 bits of source pointer; source memory type
+ * word 4: low 32 bits of destination pointer
+ * word 5: upper 16 bits of destination pointer; destination memory type
+ * word 6: reserved 32 bits
+ * word 7: reserved 32 bits
+ */
+
+#define DWORD0_SOC     BIT(0)
+#define DWORD0_IOC     BIT(1)
+
+struct dword3 {
+       unsigned int  src_hi:16;
+       unsigned int  src_mem:2;
+       unsigned int  lsb_cxt_id:8;
+       unsigned int  rsvd1:5;
+       unsigned int  fixed:1;
+};
+
+struct dword5 {
+       unsigned int  dst_hi:16;
+       unsigned int  dst_mem:2;
+       unsigned int  rsvd1:13;
+       unsigned int  fixed:1;
+};
+
+struct ptdma_desc {
+       u32 dw0;
+       u32 length;
+       u32 src_lo;
+       struct dword3 dw3;
+       u32 dst_lo;
+       struct dword5 dw5;
+       __le32 rsvd1;
+       __le32 rsvd2;
+};
+
+/* Structure to hold PT device data */
+struct pt_dev_vdata {
+       const unsigned int bar;
+};
+
+int pt_dmaengine_register(struct pt_device *pt);
+void pt_dmaengine_unregister(struct pt_device *pt);
+
+void ptdma_debugfs_setup(struct pt_device *pt);
+int pt_core_init(struct pt_device *pt);
+void pt_core_destroy(struct pt_device *pt);
+
+int pt_core_perform_passthru(struct pt_cmd_queue *cmd_q,
+                            struct pt_passthru_engine *pt_engine);
+
+void pt_start_queue(struct pt_cmd_queue *cmd_q);
+void pt_stop_queue(struct pt_cmd_queue *cmd_q);
+
+#endif
index 1343732..a462962 100644 (file)
@@ -47,3 +47,12 @@ config RENESAS_USB_DMAC
        help
          This driver supports the USB-DMA controller found in the Renesas
          SoCs.
+
+config RZ_DMAC
+       tristate "Renesas RZ/G2L DMA Controller"
+       depends on ARCH_R9A07G044 || COMPILE_TEST
+       select RENESAS_DMA
+       select DMA_VIRTUAL_CHANNELS
+       help
+         This driver supports the general purpose DMA controller found in the
+         Renesas RZ/G2L SoC variants.
index abdf103..360ab6d 100644 (file)
@@ -15,3 +15,4 @@ obj-$(CONFIG_SH_DMAE) += shdma.o
 
 obj-$(CONFIG_RCAR_DMAC) += rcar-dmac.o
 obj-$(CONFIG_RENESAS_USB_DMAC) += usb-dmac.o
+obj-$(CONFIG_RZ_DMAC) += rz-dmac.o
diff --git a/drivers/dma/sh/rz-dmac.c b/drivers/dma/sh/rz-dmac.c
new file mode 100644 (file)
index 0000000..f9f30cb
--- /dev/null
@@ -0,0 +1,969 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Renesas RZ/G2L DMA Controller Driver
+ *
+ * Based on imx-dma.c
+ *
+ * Copyright (C) 2021 Renesas Electronics Corp.
+ * Copyright 2010 Sascha Hauer, Pengutronix <s.hauer@pengutronix.de>
+ * Copyright 2012 Javier Martin, Vista Silicon <javier.martin@vista-silicon.com>
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_dma.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+enum  rz_dmac_prep_type {
+       RZ_DMAC_DESC_MEMCPY,
+       RZ_DMAC_DESC_SLAVE_SG,
+};
+
+struct rz_lmdesc {
+       u32 header;
+       u32 sa;
+       u32 da;
+       u32 tb;
+       u32 chcfg;
+       u32 chitvl;
+       u32 chext;
+       u32 nxla;
+};
+
+struct rz_dmac_desc {
+       struct virt_dma_desc vd;
+       dma_addr_t src;
+       dma_addr_t dest;
+       size_t len;
+       struct list_head node;
+       enum dma_transfer_direction direction;
+       enum rz_dmac_prep_type type;
+       /* For slave sg */
+       struct scatterlist *sg;
+       unsigned int sgcount;
+};
+
+#define to_rz_dmac_desc(d)     container_of(d, struct rz_dmac_desc, vd)
+
+struct rz_dmac_chan {
+       struct virt_dma_chan vc;
+       void __iomem *ch_base;
+       void __iomem *ch_cmn_base;
+       unsigned int index;
+       int irq;
+       struct rz_dmac_desc *desc;
+       int descs_allocated;
+
+       enum dma_slave_buswidth src_word_size;
+       enum dma_slave_buswidth dst_word_size;
+       dma_addr_t src_per_address;
+       dma_addr_t dst_per_address;
+
+       u32 chcfg;
+       u32 chctrl;
+       int mid_rid;
+
+       struct list_head ld_free;
+       struct list_head ld_queue;
+       struct list_head ld_active;
+
+       struct {
+               struct rz_lmdesc *base;
+               struct rz_lmdesc *head;
+               struct rz_lmdesc *tail;
+               dma_addr_t base_dma;
+       } lmdesc;
+};
+
+#define to_rz_dmac_chan(c)     container_of(c, struct rz_dmac_chan, vc.chan)
+
+struct rz_dmac {
+       struct dma_device engine;
+       struct device *dev;
+       void __iomem *base;
+       void __iomem *ext_base;
+
+       unsigned int n_channels;
+       struct rz_dmac_chan *channels;
+
+       DECLARE_BITMAP(modules, 1024);
+};
+
+#define to_rz_dmac(d)  container_of(d, struct rz_dmac, engine)
+
+/*
+ * -----------------------------------------------------------------------------
+ * Registers
+ */
+
+#define CHSTAT                         0x0024
+#define CHCTRL                         0x0028
+#define CHCFG                          0x002c
+#define NXLA                           0x0038
+
+#define DCTRL                          0x0000
+
+#define EACH_CHANNEL_OFFSET            0x0040
+#define CHANNEL_0_7_OFFSET             0x0000
+#define CHANNEL_0_7_COMMON_BASE                0x0300
+#define CHANNEL_8_15_OFFSET            0x0400
+#define CHANNEL_8_15_COMMON_BASE       0x0700
+
+#define CHSTAT_ER                      BIT(4)
+#define CHSTAT_EN                      BIT(0)
+
+#define CHCTRL_CLRINTMSK               BIT(17)
+#define CHCTRL_CLRSUS                  BIT(9)
+#define CHCTRL_CLRTC                   BIT(6)
+#define CHCTRL_CLREND                  BIT(5)
+#define CHCTRL_CLRRQ                   BIT(4)
+#define CHCTRL_SWRST                   BIT(3)
+#define CHCTRL_STG                     BIT(2)
+#define CHCTRL_CLREN                   BIT(1)
+#define CHCTRL_SETEN                   BIT(0)
+#define CHCTRL_DEFAULT                 (CHCTRL_CLRINTMSK | CHCTRL_CLRSUS | \
+                                        CHCTRL_CLRTC | CHCTRL_CLREND | \
+                                        CHCTRL_CLRRQ | CHCTRL_SWRST | \
+                                        CHCTRL_CLREN)
+
+#define CHCFG_DMS                      BIT(31)
+#define CHCFG_DEM                      BIT(24)
+#define CHCFG_DAD                      BIT(21)
+#define CHCFG_SAD                      BIT(20)
+#define CHCFG_REQD                     BIT(3)
+#define CHCFG_SEL(bits)                        ((bits) & 0x07)
+#define CHCFG_MEM_COPY                 (0x80400008)
+#define CHCFG_FILL_DDS(a)              (((a) << 16) & GENMASK(19, 16))
+#define CHCFG_FILL_SDS(a)              (((a) << 12) & GENMASK(15, 12))
+#define CHCFG_FILL_TM(a)               (((a) & BIT(5)) << 22)
+#define CHCFG_FILL_AM(a)               (((a) & GENMASK(4, 2)) << 6)
+#define CHCFG_FILL_LVL(a)              (((a) & BIT(1)) << 5)
+#define CHCFG_FILL_HIEN(a)             (((a) & BIT(0)) << 5)
+
+#define MID_RID_MASK                   GENMASK(9, 0)
+#define CHCFG_MASK                     GENMASK(15, 10)
+#define CHCFG_DS_INVALID               0xFF
+#define DCTRL_LVINT                    BIT(1)
+#define DCTRL_PR                       BIT(0)
+#define DCTRL_DEFAULT                  (DCTRL_LVINT | DCTRL_PR)
+
+/* LINK MODE DESCRIPTOR */
+#define HEADER_LV                      BIT(0)
+
+#define RZ_DMAC_MAX_CHAN_DESCRIPTORS   16
+#define RZ_DMAC_MAX_CHANNELS           16
+#define DMAC_NR_LMDESC                 64
+
+/*
+ * -----------------------------------------------------------------------------
+ * Device access
+ */
+
+static void rz_dmac_writel(struct rz_dmac *dmac, unsigned int val,
+                          unsigned int offset)
+{
+       writel(val, dmac->base + offset);
+}
+
+static void rz_dmac_ext_writel(struct rz_dmac *dmac, unsigned int val,
+                              unsigned int offset)
+{
+       writel(val, dmac->ext_base + offset);
+}
+
+static u32 rz_dmac_ext_readl(struct rz_dmac *dmac, unsigned int offset)
+{
+       return readl(dmac->ext_base + offset);
+}
+
+static void rz_dmac_ch_writel(struct rz_dmac_chan *channel, unsigned int val,
+                             unsigned int offset, int which)
+{
+       if (which)
+               writel(val, channel->ch_base + offset);
+       else
+               writel(val, channel->ch_cmn_base + offset);
+}
+
+static u32 rz_dmac_ch_readl(struct rz_dmac_chan *channel,
+                           unsigned int offset, int which)
+{
+       if (which)
+               return readl(channel->ch_base + offset);
+       else
+               return readl(channel->ch_cmn_base + offset);
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Initialization
+ */
+
+static void rz_lmdesc_setup(struct rz_dmac_chan *channel,
+                           struct rz_lmdesc *lmdesc)
+{
+       u32 nxla;
+
+       channel->lmdesc.base = lmdesc;
+       channel->lmdesc.head = lmdesc;
+       channel->lmdesc.tail = lmdesc;
+       nxla = channel->lmdesc.base_dma;
+       while (lmdesc < (channel->lmdesc.base + (DMAC_NR_LMDESC - 1))) {
+               lmdesc->header = 0;
+               nxla += sizeof(*lmdesc);
+               lmdesc->nxla = nxla;
+               lmdesc++;
+       }
+
+       lmdesc->header = 0;
+       lmdesc->nxla = channel->lmdesc.base_dma;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Descriptors preparation
+ */
+
+static void rz_dmac_lmdesc_recycle(struct rz_dmac_chan *channel)
+{
+       struct rz_lmdesc *lmdesc = channel->lmdesc.head;
+
+       while (!(lmdesc->header & HEADER_LV)) {
+               lmdesc->header = 0;
+               lmdesc++;
+               if (lmdesc >= (channel->lmdesc.base + DMAC_NR_LMDESC))
+                       lmdesc = channel->lmdesc.base;
+       }
+       channel->lmdesc.head = lmdesc;
+}
+
+static void rz_dmac_enable_hw(struct rz_dmac_chan *channel)
+{
+       struct dma_chan *chan = &channel->vc.chan;
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       unsigned long flags;
+       u32 nxla;
+       u32 chctrl;
+       u32 chstat;
+
+       dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index);
+
+       local_irq_save(flags);
+
+       rz_dmac_lmdesc_recycle(channel);
+
+       nxla = channel->lmdesc.base_dma +
+               (sizeof(struct rz_lmdesc) * (channel->lmdesc.head -
+                                            channel->lmdesc.base));
+
+       chstat = rz_dmac_ch_readl(channel, CHSTAT, 1);
+       if (!(chstat & CHSTAT_EN)) {
+               chctrl = (channel->chctrl | CHCTRL_SETEN);
+               rz_dmac_ch_writel(channel, nxla, NXLA, 1);
+               rz_dmac_ch_writel(channel, channel->chcfg, CHCFG, 1);
+               rz_dmac_ch_writel(channel, CHCTRL_SWRST, CHCTRL, 1);
+               rz_dmac_ch_writel(channel, chctrl, CHCTRL, 1);
+       }
+
+       local_irq_restore(flags);
+}
+
+static void rz_dmac_disable_hw(struct rz_dmac_chan *channel)
+{
+       struct dma_chan *chan = &channel->vc.chan;
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       unsigned long flags;
+
+       dev_dbg(dmac->dev, "%s channel %d\n", __func__, channel->index);
+
+       local_irq_save(flags);
+       rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+       local_irq_restore(flags);
+}
+
+static void rz_dmac_set_dmars_register(struct rz_dmac *dmac, int nr, u32 dmars)
+{
+       u32 dmars_offset = (nr / 2) * 4;
+       u32 shift = (nr % 2) * 16;
+       u32 dmars32;
+
+       dmars32 = rz_dmac_ext_readl(dmac, dmars_offset);
+       dmars32 &= ~(0xffff << shift);
+       dmars32 |= dmars << shift;
+
+       rz_dmac_ext_writel(dmac, dmars32, dmars_offset);
+}
+
+static void rz_dmac_prepare_desc_for_memcpy(struct rz_dmac_chan *channel)
+{
+       struct dma_chan *chan = &channel->vc.chan;
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       struct rz_lmdesc *lmdesc = channel->lmdesc.tail;
+       struct rz_dmac_desc *d = channel->desc;
+       u32 chcfg = CHCFG_MEM_COPY;
+
+       /* prepare descriptor */
+       lmdesc->sa = d->src;
+       lmdesc->da = d->dest;
+       lmdesc->tb = d->len;
+       lmdesc->chcfg = chcfg;
+       lmdesc->chitvl = 0;
+       lmdesc->chext = 0;
+       lmdesc->header = HEADER_LV;
+
+       rz_dmac_set_dmars_register(dmac, channel->index, 0);
+
+       channel->chcfg = chcfg;
+       channel->chctrl = CHCTRL_STG | CHCTRL_SETEN;
+}
+
+static void rz_dmac_prepare_descs_for_slave_sg(struct rz_dmac_chan *channel)
+{
+       struct dma_chan *chan = &channel->vc.chan;
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       struct rz_dmac_desc *d = channel->desc;
+       struct scatterlist *sg, *sgl = d->sg;
+       struct rz_lmdesc *lmdesc;
+       unsigned int i, sg_len = d->sgcount;
+
+       channel->chcfg |= CHCFG_SEL(channel->index) | CHCFG_DEM | CHCFG_DMS;
+
+       if (d->direction == DMA_DEV_TO_MEM) {
+               channel->chcfg |= CHCFG_SAD;
+               channel->chcfg &= ~CHCFG_REQD;
+       } else {
+               channel->chcfg |= CHCFG_DAD | CHCFG_REQD;
+       }
+
+       lmdesc = channel->lmdesc.tail;
+
+       for (i = 0, sg = sgl; i < sg_len; i++, sg = sg_next(sg)) {
+               if (d->direction == DMA_DEV_TO_MEM) {
+                       lmdesc->sa = channel->src_per_address;
+                       lmdesc->da = sg_dma_address(sg);
+               } else {
+                       lmdesc->sa = sg_dma_address(sg);
+                       lmdesc->da = channel->dst_per_address;
+               }
+
+               lmdesc->tb = sg_dma_len(sg);
+               lmdesc->chitvl = 0;
+               lmdesc->chext = 0;
+               if (i == (sg_len - 1)) {
+                       lmdesc->chcfg = (channel->chcfg & ~CHCFG_DEM);
+                       lmdesc->header = HEADER_LV;
+               } else {
+                       lmdesc->chcfg = channel->chcfg;
+                       lmdesc->header = HEADER_LV;
+               }
+               if (++lmdesc >= (channel->lmdesc.base + DMAC_NR_LMDESC))
+                       lmdesc = channel->lmdesc.base;
+       }
+
+       channel->lmdesc.tail = lmdesc;
+
+       rz_dmac_set_dmars_register(dmac, channel->index, channel->mid_rid);
+       channel->chctrl = CHCTRL_SETEN;
+}
+
+static int rz_dmac_xfer_desc(struct rz_dmac_chan *chan)
+{
+       struct rz_dmac_desc *d = chan->desc;
+       struct virt_dma_desc *vd;
+
+       vd = vchan_next_desc(&chan->vc);
+       if (!vd)
+               return 0;
+
+       list_del(&vd->node);
+
+       switch (d->type) {
+       case RZ_DMAC_DESC_MEMCPY:
+               rz_dmac_prepare_desc_for_memcpy(chan);
+               break;
+
+       case RZ_DMAC_DESC_SLAVE_SG:
+               rz_dmac_prepare_descs_for_slave_sg(chan);
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       rz_dmac_enable_hw(chan);
+
+       return 0;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * DMA engine operations
+ */
+
+static int rz_dmac_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+
+       while (channel->descs_allocated < RZ_DMAC_MAX_CHAN_DESCRIPTORS) {
+               struct rz_dmac_desc *desc;
+
+               desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+               if (!desc)
+                       break;
+
+               list_add_tail(&desc->node, &channel->ld_free);
+               channel->descs_allocated++;
+       }
+
+       if (!channel->descs_allocated)
+               return -ENOMEM;
+
+       return channel->descs_allocated;
+}
+
+static void rz_dmac_free_chan_resources(struct dma_chan *chan)
+{
+       struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       struct rz_lmdesc *lmdesc = channel->lmdesc.base;
+       struct rz_dmac_desc *desc, *_desc;
+       unsigned long flags;
+       unsigned int i;
+
+       spin_lock_irqsave(&channel->vc.lock, flags);
+
+       for (i = 0; i < DMAC_NR_LMDESC; i++)
+               lmdesc[i].header = 0;
+
+       rz_dmac_disable_hw(channel);
+       list_splice_tail_init(&channel->ld_active, &channel->ld_free);
+       list_splice_tail_init(&channel->ld_queue, &channel->ld_free);
+
+       if (channel->mid_rid >= 0) {
+               clear_bit(channel->mid_rid, dmac->modules);
+               channel->mid_rid = -EINVAL;
+       }
+
+       spin_unlock_irqrestore(&channel->vc.lock, flags);
+
+       list_for_each_entry_safe(desc, _desc, &channel->ld_free, node) {
+               kfree(desc);
+               channel->descs_allocated--;
+       }
+
+       INIT_LIST_HEAD(&channel->ld_free);
+       vchan_free_chan_resources(&channel->vc);
+}
+
+static struct dma_async_tx_descriptor *
+rz_dmac_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+                       size_t len, unsigned long flags)
+{
+       struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       struct rz_dmac_desc *desc;
+
+       dev_dbg(dmac->dev, "%s channel: %d src=0x%pad dst=0x%pad len=%zu\n",
+               __func__, channel->index, &src, &dest, len);
+
+       if (list_empty(&channel->ld_free))
+               return NULL;
+
+       desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
+
+       desc->type = RZ_DMAC_DESC_MEMCPY;
+       desc->src = src;
+       desc->dest = dest;
+       desc->len = len;
+       desc->direction = DMA_MEM_TO_MEM;
+
+       list_move_tail(channel->ld_free.next, &channel->ld_queue);
+       return vchan_tx_prep(&channel->vc, &desc->vd, flags);
+}
+
+static struct dma_async_tx_descriptor *
+rz_dmac_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+                     unsigned int sg_len,
+                     enum dma_transfer_direction direction,
+                     unsigned long flags, void *context)
+{
+       struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+       struct rz_dmac_desc *desc;
+       struct scatterlist *sg;
+       int dma_length = 0;
+       int i = 0;
+
+       if (list_empty(&channel->ld_free))
+               return NULL;
+
+       desc = list_first_entry(&channel->ld_free, struct rz_dmac_desc, node);
+
+       for_each_sg(sgl, sg, sg_len, i) {
+               dma_length += sg_dma_len(sg);
+       }
+
+       desc->type = RZ_DMAC_DESC_SLAVE_SG;
+       desc->sg = sgl;
+       desc->sgcount = sg_len;
+       desc->len = dma_length;
+       desc->direction = direction;
+
+       if (direction == DMA_DEV_TO_MEM)
+               desc->src = channel->src_per_address;
+       else
+               desc->dest = channel->dst_per_address;
+
+       list_move_tail(channel->ld_free.next, &channel->ld_queue);
+       return vchan_tx_prep(&channel->vc, &desc->vd, flags);
+}
+
+static int rz_dmac_terminate_all(struct dma_chan *chan)
+{
+       struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+       unsigned long flags;
+       LIST_HEAD(head);
+
+       rz_dmac_disable_hw(channel);
+       spin_lock_irqsave(&channel->vc.lock, flags);
+       list_splice_tail_init(&channel->ld_active, &channel->ld_free);
+       list_splice_tail_init(&channel->ld_queue, &channel->ld_free);
+       spin_unlock_irqrestore(&channel->vc.lock, flags);
+       vchan_get_all_descriptors(&channel->vc, &head);
+       vchan_dma_desc_free_list(&channel->vc, &head);
+
+       return 0;
+}
+
+static void rz_dmac_issue_pending(struct dma_chan *chan)
+{
+       struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       struct rz_dmac_desc *desc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&channel->vc.lock, flags);
+
+       if (!list_empty(&channel->ld_queue)) {
+               desc = list_first_entry(&channel->ld_queue,
+                                       struct rz_dmac_desc, node);
+               channel->desc = desc;
+               if (vchan_issue_pending(&channel->vc)) {
+                       if (rz_dmac_xfer_desc(channel) < 0)
+                               dev_warn(dmac->dev, "ch: %d couldn't issue DMA xfer\n",
+                                        channel->index);
+                       else
+                               list_move_tail(channel->ld_queue.next,
+                                              &channel->ld_active);
+               }
+       }
+
+       spin_unlock_irqrestore(&channel->vc.lock, flags);
+}
+
+static u8 rz_dmac_ds_to_val_mapping(enum dma_slave_buswidth ds)
+{
+       u8 i;
+       const enum dma_slave_buswidth ds_lut[] = {
+               DMA_SLAVE_BUSWIDTH_1_BYTE,
+               DMA_SLAVE_BUSWIDTH_2_BYTES,
+               DMA_SLAVE_BUSWIDTH_4_BYTES,
+               DMA_SLAVE_BUSWIDTH_8_BYTES,
+               DMA_SLAVE_BUSWIDTH_16_BYTES,
+               DMA_SLAVE_BUSWIDTH_32_BYTES,
+               DMA_SLAVE_BUSWIDTH_64_BYTES,
+               DMA_SLAVE_BUSWIDTH_128_BYTES,
+       };
+
+       for (i = 0; i < ARRAY_SIZE(ds_lut); i++) {
+               if (ds_lut[i] == ds)
+                       return i;
+       }
+
+       return CHCFG_DS_INVALID;
+}
+
+static int rz_dmac_config(struct dma_chan *chan,
+                         struct dma_slave_config *config)
+{
+       struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+       u32 val;
+
+       channel->src_per_address = config->src_addr;
+       channel->src_word_size = config->src_addr_width;
+       channel->dst_per_address = config->dst_addr;
+       channel->dst_word_size = config->dst_addr_width;
+
+       val = rz_dmac_ds_to_val_mapping(config->dst_addr_width);
+       if (val == CHCFG_DS_INVALID)
+               return -EINVAL;
+
+       channel->chcfg |= CHCFG_FILL_DDS(val);
+
+       val = rz_dmac_ds_to_val_mapping(config->src_addr_width);
+       if (val == CHCFG_DS_INVALID)
+               return -EINVAL;
+
+       channel->chcfg |= CHCFG_FILL_SDS(val);
+
+       return 0;
+}
+
+static void rz_dmac_virt_desc_free(struct virt_dma_desc *vd)
+{
+       /*
+        * Place holder
+        * Descriptor allocation is done during alloc_chan_resources and
+        * get freed during free_chan_resources.
+        * list is used to manage the descriptors and avoid any memory
+        * allocation/free during DMA read/write.
+        */
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * IRQ handling
+ */
+
+static void rz_dmac_irq_handle_channel(struct rz_dmac_chan *channel)
+{
+       struct dma_chan *chan = &channel->vc.chan;
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       u32 chstat, chctrl;
+
+       chstat = rz_dmac_ch_readl(channel, CHSTAT, 1);
+       if (chstat & CHSTAT_ER) {
+               dev_err(dmac->dev, "DMAC err CHSTAT_%d = %08X\n",
+                       channel->index, chstat);
+               rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+               goto done;
+       }
+
+       chctrl = rz_dmac_ch_readl(channel, CHCTRL, 1);
+       rz_dmac_ch_writel(channel, chctrl | CHCTRL_CLREND, CHCTRL, 1);
+done:
+       return;
+}
+
+static irqreturn_t rz_dmac_irq_handler(int irq, void *dev_id)
+{
+       struct rz_dmac_chan *channel = dev_id;
+
+       if (channel) {
+               rz_dmac_irq_handle_channel(channel);
+               return IRQ_WAKE_THREAD;
+       }
+       /* handle DMAERR irq */
+       return IRQ_HANDLED;
+}
+
+static irqreturn_t rz_dmac_irq_handler_thread(int irq, void *dev_id)
+{
+       struct rz_dmac_chan *channel = dev_id;
+       struct rz_dmac_desc *desc = NULL;
+       unsigned long flags;
+
+       spin_lock_irqsave(&channel->vc.lock, flags);
+
+       if (list_empty(&channel->ld_active)) {
+               /* Someone might have called terminate all */
+               goto out;
+       }
+
+       desc = list_first_entry(&channel->ld_active, struct rz_dmac_desc, node);
+       vchan_cookie_complete(&desc->vd);
+       list_move_tail(channel->ld_active.next, &channel->ld_free);
+       if (!list_empty(&channel->ld_queue)) {
+               desc = list_first_entry(&channel->ld_queue, struct rz_dmac_desc,
+                                       node);
+               channel->desc = desc;
+               if (rz_dmac_xfer_desc(channel) == 0)
+                       list_move_tail(channel->ld_queue.next, &channel->ld_active);
+       }
+out:
+       spin_unlock_irqrestore(&channel->vc.lock, flags);
+
+       return IRQ_HANDLED;
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * OF xlate and channel filter
+ */
+
+static bool rz_dmac_chan_filter(struct dma_chan *chan, void *arg)
+{
+       struct rz_dmac_chan *channel = to_rz_dmac_chan(chan);
+       struct rz_dmac *dmac = to_rz_dmac(chan->device);
+       struct of_phandle_args *dma_spec = arg;
+       u32 ch_cfg;
+
+       channel->mid_rid = dma_spec->args[0] & MID_RID_MASK;
+       ch_cfg = (dma_spec->args[0] & CHCFG_MASK) >> 10;
+       channel->chcfg = CHCFG_FILL_TM(ch_cfg) | CHCFG_FILL_AM(ch_cfg) |
+                        CHCFG_FILL_LVL(ch_cfg) | CHCFG_FILL_HIEN(ch_cfg);
+
+       return !test_and_set_bit(channel->mid_rid, dmac->modules);
+}
+
+static struct dma_chan *rz_dmac_of_xlate(struct of_phandle_args *dma_spec,
+                                        struct of_dma *ofdma)
+{
+       dma_cap_mask_t mask;
+
+       if (dma_spec->args_count != 1)
+               return NULL;
+
+       /* Only slave DMA channels can be allocated via DT */
+       dma_cap_zero(mask);
+       dma_cap_set(DMA_SLAVE, mask);
+
+       return dma_request_channel(mask, rz_dmac_chan_filter, dma_spec);
+}
+
+/*
+ * -----------------------------------------------------------------------------
+ * Probe and remove
+ */
+
+static int rz_dmac_chan_probe(struct rz_dmac *dmac,
+                             struct rz_dmac_chan *channel,
+                             unsigned int index)
+{
+       struct platform_device *pdev = to_platform_device(dmac->dev);
+       struct rz_lmdesc *lmdesc;
+       char pdev_irqname[5];
+       char *irqname;
+       int ret;
+
+       channel->index = index;
+       channel->mid_rid = -EINVAL;
+
+       /* Request the channel interrupt. */
+       sprintf(pdev_irqname, "ch%u", index);
+       channel->irq = platform_get_irq_byname(pdev, pdev_irqname);
+       if (channel->irq < 0)
+               return channel->irq;
+
+       irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
+                                dev_name(dmac->dev), index);
+       if (!irqname)
+               return -ENOMEM;
+
+       ret = devm_request_threaded_irq(dmac->dev, channel->irq,
+                                       rz_dmac_irq_handler,
+                                       rz_dmac_irq_handler_thread, 0,
+                                       irqname, channel);
+       if (ret) {
+               dev_err(dmac->dev, "failed to request IRQ %u (%d)\n",
+                       channel->irq, ret);
+               return ret;
+       }
+
+       /* Set io base address for each channel */
+       if (index < 8) {
+               channel->ch_base = dmac->base + CHANNEL_0_7_OFFSET +
+                       EACH_CHANNEL_OFFSET * index;
+               channel->ch_cmn_base = dmac->base + CHANNEL_0_7_COMMON_BASE;
+       } else {
+               channel->ch_base = dmac->base + CHANNEL_8_15_OFFSET +
+                       EACH_CHANNEL_OFFSET * (index - 8);
+               channel->ch_cmn_base = dmac->base + CHANNEL_8_15_COMMON_BASE;
+       }
+
+       /* Allocate descriptors */
+       lmdesc = dma_alloc_coherent(&pdev->dev,
+                                   sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+                                   &channel->lmdesc.base_dma, GFP_KERNEL);
+       if (!lmdesc) {
+               dev_err(&pdev->dev, "Can't allocate memory (lmdesc)\n");
+               return -ENOMEM;
+       }
+       rz_lmdesc_setup(channel, lmdesc);
+
+       /* Initialize register for each channel */
+       rz_dmac_ch_writel(channel, CHCTRL_DEFAULT, CHCTRL, 1);
+
+       channel->vc.desc_free = rz_dmac_virt_desc_free;
+       vchan_init(&channel->vc, &dmac->engine);
+       INIT_LIST_HEAD(&channel->ld_queue);
+       INIT_LIST_HEAD(&channel->ld_free);
+       INIT_LIST_HEAD(&channel->ld_active);
+
+       return 0;
+}
+
+static int rz_dmac_parse_of(struct device *dev, struct rz_dmac *dmac)
+{
+       struct device_node *np = dev->of_node;
+       int ret;
+
+       ret = of_property_read_u32(np, "dma-channels", &dmac->n_channels);
+       if (ret < 0) {
+               dev_err(dev, "unable to read dma-channels property\n");
+               return ret;
+       }
+
+       if (!dmac->n_channels || dmac->n_channels > RZ_DMAC_MAX_CHANNELS) {
+               dev_err(dev, "invalid number of channels %u\n", dmac->n_channels);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static int rz_dmac_probe(struct platform_device *pdev)
+{
+       const char *irqname = "error";
+       struct dma_device *engine;
+       struct rz_dmac *dmac;
+       int channel_num;
+       unsigned int i;
+       int ret;
+       int irq;
+
+       dmac = devm_kzalloc(&pdev->dev, sizeof(*dmac), GFP_KERNEL);
+       if (!dmac)
+               return -ENOMEM;
+
+       dmac->dev = &pdev->dev;
+       platform_set_drvdata(pdev, dmac);
+
+       ret = rz_dmac_parse_of(&pdev->dev, dmac);
+       if (ret < 0)
+               return ret;
+
+       dmac->channels = devm_kcalloc(&pdev->dev, dmac->n_channels,
+                                     sizeof(*dmac->channels), GFP_KERNEL);
+       if (!dmac->channels)
+               return -ENOMEM;
+
+       /* Request resources */
+       dmac->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(dmac->base))
+               return PTR_ERR(dmac->base);
+
+       dmac->ext_base = devm_platform_ioremap_resource(pdev, 1);
+       if (IS_ERR(dmac->ext_base))
+               return PTR_ERR(dmac->ext_base);
+
+       /* Register interrupt handler for error */
+       irq = platform_get_irq_byname(pdev, irqname);
+       if (irq < 0)
+               return irq;
+
+       ret = devm_request_irq(&pdev->dev, irq, rz_dmac_irq_handler, 0,
+                              irqname, NULL);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to request IRQ %u (%d)\n",
+                       irq, ret);
+               return ret;
+       }
+
+       /* Initialize the channels. */
+       INIT_LIST_HEAD(&dmac->engine.channels);
+
+       for (i = 0; i < dmac->n_channels; i++) {
+               ret = rz_dmac_chan_probe(dmac, &dmac->channels[i], i);
+               if (ret < 0)
+                       goto err;
+       }
+
+       /* Register the DMAC as a DMA provider for DT. */
+       ret = of_dma_controller_register(pdev->dev.of_node, rz_dmac_of_xlate,
+                                        NULL);
+       if (ret < 0)
+               goto err;
+
+       /* Register the DMA engine device. */
+       engine = &dmac->engine;
+       dma_cap_set(DMA_SLAVE, engine->cap_mask);
+       dma_cap_set(DMA_MEMCPY, engine->cap_mask);
+       rz_dmac_writel(dmac, DCTRL_DEFAULT, CHANNEL_0_7_COMMON_BASE + DCTRL);
+       rz_dmac_writel(dmac, DCTRL_DEFAULT, CHANNEL_8_15_COMMON_BASE + DCTRL);
+
+       engine->dev = &pdev->dev;
+
+       engine->device_alloc_chan_resources = rz_dmac_alloc_chan_resources;
+       engine->device_free_chan_resources = rz_dmac_free_chan_resources;
+       engine->device_tx_status = dma_cookie_status;
+       engine->device_prep_slave_sg = rz_dmac_prep_slave_sg;
+       engine->device_prep_dma_memcpy = rz_dmac_prep_dma_memcpy;
+       engine->device_config = rz_dmac_config;
+       engine->device_terminate_all = rz_dmac_terminate_all;
+       engine->device_issue_pending = rz_dmac_issue_pending;
+
+       engine->copy_align = DMAENGINE_ALIGN_1_BYTE;
+       dma_set_max_seg_size(engine->dev, U32_MAX);
+
+       ret = dma_async_device_register(engine);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "unable to register\n");
+               goto dma_register_err;
+       }
+       return 0;
+
+dma_register_err:
+       of_dma_controller_free(pdev->dev.of_node);
+err:
+       channel_num = i ? i - 1 : 0;
+       for (i = 0; i < channel_num; i++) {
+               struct rz_dmac_chan *channel = &dmac->channels[i];
+
+               dma_free_coherent(&pdev->dev,
+                                 sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+                                 channel->lmdesc.base,
+                                 channel->lmdesc.base_dma);
+       }
+
+       return ret;
+}
+
+static int rz_dmac_remove(struct platform_device *pdev)
+{
+       struct rz_dmac *dmac = platform_get_drvdata(pdev);
+       unsigned int i;
+
+       for (i = 0; i < dmac->n_channels; i++) {
+               struct rz_dmac_chan *channel = &dmac->channels[i];
+
+               dma_free_coherent(&pdev->dev,
+                                 sizeof(struct rz_lmdesc) * DMAC_NR_LMDESC,
+                                 channel->lmdesc.base,
+                                 channel->lmdesc.base_dma);
+       }
+       of_dma_controller_free(pdev->dev.of_node);
+       dma_async_device_unregister(&dmac->engine);
+
+       return 0;
+}
+
+static const struct of_device_id of_rz_dmac_match[] = {
+       { .compatible = "renesas,rz-dmac", },
+       { /* Sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, of_rz_dmac_match);
+
+static struct platform_driver rz_dmac_driver = {
+       .driver         = {
+               .name   = "rz-dmac",
+               .of_match_table = of_rz_dmac_match,
+       },
+       .probe          = rz_dmac_probe,
+       .remove         = rz_dmac_remove,
+};
+
+module_platform_driver(rz_dmac_driver);
+
+MODULE_DESCRIPTION("Renesas RZ/G2L DMA Controller Driver");
+MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
+MODULE_LICENSE("GPL v2");
index 1cc0690..5edaeb8 100644 (file)
@@ -466,7 +466,7 @@ static int usb_dmac_chan_terminate_all(struct dma_chan *chan)
 
 static unsigned int usb_dmac_get_current_residue(struct usb_dmac_chan *chan,
                                                 struct usb_dmac_desc *desc,
-                                                int sg_index)
+                                                unsigned int sg_index)
 {
        struct usb_dmac_sg *sg = desc->sg + sg_index;
        u32 mem_addr = sg->mem_addr & 0xffffffff;
index 0ef5ca8..4357d23 100644 (file)
@@ -1265,6 +1265,7 @@ static const struct of_device_id sprd_dma_match[] = {
        { .compatible = "sprd,sc9860-dma", },
        {},
 };
+MODULE_DEVICE_TABLE(of, sprd_dma_match);
 
 static int __maybe_unused sprd_dma_runtime_suspend(struct device *dev)
 {
index 7dd1d3d..9063c72 100644 (file)
@@ -60,6 +60,7 @@
 #define STM32_DMA_SCR_PSIZE_GET(n)     ((n & STM32_DMA_SCR_PSIZE_MASK) >> 11)
 #define STM32_DMA_SCR_DIR_MASK         GENMASK(7, 6)
 #define STM32_DMA_SCR_DIR(n)           ((n & 0x3) << 6)
+#define STM32_DMA_SCR_TRBUFF           BIT(20) /* Bufferable transfer for USART/UART */
 #define STM32_DMA_SCR_CT               BIT(19) /* Target in double buffer */
 #define STM32_DMA_SCR_DBM              BIT(18) /* Double Buffer Mode */
 #define STM32_DMA_SCR_PINCOS           BIT(15) /* Peripheral inc offset size */
 #define STM32_DMA_THRESHOLD_FTR_MASK   GENMASK(1, 0)
 #define STM32_DMA_THRESHOLD_FTR_GET(n) ((n) & STM32_DMA_THRESHOLD_FTR_MASK)
 #define STM32_DMA_DIRECT_MODE_MASK     BIT(2)
-#define STM32_DMA_DIRECT_MODE_GET(n)   (((n) & STM32_DMA_DIRECT_MODE_MASK) \
-                                        >> 2)
+#define STM32_DMA_DIRECT_MODE_GET(n)   (((n) & STM32_DMA_DIRECT_MODE_MASK) >> 2)
+#define STM32_DMA_ALT_ACK_MODE_MASK    BIT(4)
+#define STM32_DMA_ALT_ACK_MODE_GET(n)  (((n) & STM32_DMA_ALT_ACK_MODE_MASK) >> 4)
 
 enum stm32_dma_width {
        STM32_DMA_BYTE,
@@ -1252,6 +1254,8 @@ static void stm32_dma_set_config(struct stm32_dma_chan *chan,
        chan->threshold = STM32_DMA_THRESHOLD_FTR_GET(cfg->features);
        if (STM32_DMA_DIRECT_MODE_GET(cfg->features))
                chan->threshold = STM32_DMA_FIFO_THRESHOLD_NONE;
+       if (STM32_DMA_ALT_ACK_MODE_GET(cfg->features))
+               chan->chan_reg.dma_scr |= STM32_DMA_SCR_TRBUFF;
 }
 
 static struct dma_chan *stm32_dma_of_xlate(struct of_phandle_args *dma_spec,
index 4735742..b1115a6 100644 (file)
@@ -655,9 +655,8 @@ static int tegra_adma_alloc_chan_resources(struct dma_chan *dc)
                return ret;
        }
 
-       ret = pm_runtime_get_sync(tdc2dev(tdc));
+       ret = pm_runtime_resume_and_get(tdc2dev(tdc));
        if (ret < 0) {
-               pm_runtime_put_noidle(tdc2dev(tdc));
                free_irq(tdc->irq, tdc);
                return ret;
        }
@@ -869,10 +868,8 @@ static int tegra_adma_probe(struct platform_device *pdev)
        pm_runtime_enable(&pdev->dev);
 
        ret = pm_runtime_get_sync(&pdev->dev);
-       if (ret < 0) {
-               pm_runtime_put_noidle(&pdev->dev);
+       if (ret < 0)
                goto rpm_disable;
-       }
 
        ret = tegra_adma_init(tdma);
        if (ret)
index 7580870..34e3fc5 100644 (file)
                },                                      \
        }
 
+#define PSIL_CSI2RX(x)                                 \
+       {                                               \
+               .thread_id = x,                         \
+               .ep_config = {                          \
+                       .ep_type = PSIL_EP_NATIVE,      \
+               },                                      \
+       }
+
 /* PSI-L source thread IDs, used for RX (DMA_DEV_TO_MEM) */
 static struct psil_ep j721e_src_ep_map[] = {
        /* SA2UL */
@@ -138,6 +146,71 @@ static struct psil_ep j721e_src_ep_map[] = {
        PSIL_PDMA_XY_PKT(0x4707),
        PSIL_PDMA_XY_PKT(0x4708),
        PSIL_PDMA_XY_PKT(0x4709),
+       /* CSI2RX */
+       PSIL_CSI2RX(0x4940),
+       PSIL_CSI2RX(0x4941),
+       PSIL_CSI2RX(0x4942),
+       PSIL_CSI2RX(0x4943),
+       PSIL_CSI2RX(0x4944),
+       PSIL_CSI2RX(0x4945),
+       PSIL_CSI2RX(0x4946),
+       PSIL_CSI2RX(0x4947),
+       PSIL_CSI2RX(0x4948),
+       PSIL_CSI2RX(0x4949),
+       PSIL_CSI2RX(0x494a),
+       PSIL_CSI2RX(0x494b),
+       PSIL_CSI2RX(0x494c),
+       PSIL_CSI2RX(0x494d),
+       PSIL_CSI2RX(0x494e),
+       PSIL_CSI2RX(0x494f),
+       PSIL_CSI2RX(0x4950),
+       PSIL_CSI2RX(0x4951),
+       PSIL_CSI2RX(0x4952),
+       PSIL_CSI2RX(0x4953),
+       PSIL_CSI2RX(0x4954),
+       PSIL_CSI2RX(0x4955),
+       PSIL_CSI2RX(0x4956),
+       PSIL_CSI2RX(0x4957),
+       PSIL_CSI2RX(0x4958),
+       PSIL_CSI2RX(0x4959),
+       PSIL_CSI2RX(0x495a),
+       PSIL_CSI2RX(0x495b),
+       PSIL_CSI2RX(0x495c),
+       PSIL_CSI2RX(0x495d),
+       PSIL_CSI2RX(0x495e),
+       PSIL_CSI2RX(0x495f),
+       PSIL_CSI2RX(0x4960),
+       PSIL_CSI2RX(0x4961),
+       PSIL_CSI2RX(0x4962),
+       PSIL_CSI2RX(0x4963),
+       PSIL_CSI2RX(0x4964),
+       PSIL_CSI2RX(0x4965),
+       PSIL_CSI2RX(0x4966),
+       PSIL_CSI2RX(0x4967),
+       PSIL_CSI2RX(0x4968),
+       PSIL_CSI2RX(0x4969),
+       PSIL_CSI2RX(0x496a),
+       PSIL_CSI2RX(0x496b),
+       PSIL_CSI2RX(0x496c),
+       PSIL_CSI2RX(0x496d),
+       PSIL_CSI2RX(0x496e),
+       PSIL_CSI2RX(0x496f),
+       PSIL_CSI2RX(0x4970),
+       PSIL_CSI2RX(0x4971),
+       PSIL_CSI2RX(0x4972),
+       PSIL_CSI2RX(0x4973),
+       PSIL_CSI2RX(0x4974),
+       PSIL_CSI2RX(0x4975),
+       PSIL_CSI2RX(0x4976),
+       PSIL_CSI2RX(0x4977),
+       PSIL_CSI2RX(0x4978),
+       PSIL_CSI2RX(0x4979),
+       PSIL_CSI2RX(0x497a),
+       PSIL_CSI2RX(0x497b),
+       PSIL_CSI2RX(0x497c),
+       PSIL_CSI2RX(0x497d),
+       PSIL_CSI2RX(0x497e),
+       PSIL_CSI2RX(0x497f),
        /* CPSW9 */
        PSIL_ETHERNET(0x4a00),
        /* CPSW0 */
index 4b9530a..a4450bc 100644 (file)
@@ -1420,8 +1420,7 @@ static void xilinx_vdma_start_transfer(struct xilinx_dma_chan *chan)
 
        chan->desc_submitcount++;
        chan->desc_pendingcount--;
-       list_del(&desc->node);
-       list_add_tail(&desc->node, &chan->active_list);
+       list_move_tail(&desc->node, &chan->active_list);
        if (chan->desc_submitcount == chan->num_frms)
                chan->desc_submitcount = 0;
 
@@ -1658,6 +1657,17 @@ static void xilinx_dma_issue_pending(struct dma_chan *dchan)
        spin_unlock_irqrestore(&chan->lock, flags);
 }
 
+/**
+ * xilinx_dma_device_config - Configure the DMA channel
+ * @dchan: DMA channel
+ * @config: channel configuration
+ */
+static int xilinx_dma_device_config(struct dma_chan *dchan,
+                                   struct dma_slave_config *config)
+{
+       return 0;
+}
+
 /**
  * xilinx_dma_complete_descriptor - Mark the active descriptor as complete
  * @chan : xilinx DMA channel
@@ -3077,7 +3087,7 @@ static int xilinx_dma_probe(struct platform_device *pdev)
                xdev->ext_addr = false;
 
        /* Set the dma mask bits */
-       dma_set_mask(xdev->dev, DMA_BIT_MASK(addr_width));
+       dma_set_mask_and_coherent(xdev->dev, DMA_BIT_MASK(addr_width));
 
        /* Initialize the DMA engine */
        xdev->common.dev = &pdev->dev;
@@ -3096,6 +3106,7 @@ static int xilinx_dma_probe(struct platform_device *pdev)
        xdev->common.device_synchronize = xilinx_dma_synchronize;
        xdev->common.device_tx_status = xilinx_dma_tx_status;
        xdev->common.device_issue_pending = xilinx_dma_issue_pending;
+       xdev->common.device_config = xilinx_dma_device_config;
        if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
                dma_cap_set(DMA_CYCLIC, xdev->common.cap_mask);
                xdev->common.device_prep_slave_sg = xilinx_dma_prep_slave_sg;
index 5fecf5a..97f02f8 100644 (file)
@@ -434,8 +434,7 @@ static void zynqmp_dma_free_descriptor(struct zynqmp_dma_chan *chan,
        struct zynqmp_dma_desc_sw *child, *next;
 
        chan->desc_free_cnt++;
-       list_del(&sdesc->node);
-       list_add_tail(&sdesc->node, &chan->free_list);
+       list_move_tail(&sdesc->node, &chan->free_list);
        list_for_each_entry_safe(child, next, &sdesc->tx_list, node) {
                chan->desc_free_cnt++;
                list_move_tail(&child->node, &chan->free_list);
index 8f53837..97178b3 100644 (file)
@@ -468,14 +468,18 @@ bool amdgpu_atomfirmware_dynamic_boot_config_supported(struct amdgpu_device *ade
        return (fw_cap & ATOM_FIRMWARE_CAP_DYNAMIC_BOOT_CFG_ENABLE) ? true : false;
 }
 
-/*
- * Helper function to query RAS EEPROM address
- *
- * @adev: amdgpu_device pointer
+/**
+ * amdgpu_atomfirmware_ras_rom_addr -- Get the RAS EEPROM addr from VBIOS
+ * adev: amdgpu_device pointer
+ * i2c_address: pointer to u8; if not NULL, will contain
+ *    the RAS EEPROM address if the function returns true
  *
- * Return true if vbios supports ras rom address reporting
+ * Return true if VBIOS supports RAS EEPROM address reporting,
+ * else return false. If true and @i2c_address is not NULL,
+ * will contain the RAS ROM address.
  */
-bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_address)
+bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev,
+                                     u8 *i2c_address)
 {
        struct amdgpu_mode_info *mode_info = &adev->mode_info;
        int index;
@@ -483,27 +487,39 @@ bool amdgpu_atomfirmware_ras_rom_addr(struct amdgpu_device *adev, uint8_t* i2c_a
        union firmware_info *firmware_info;
        u8 frev, crev;
 
-       if (i2c_address == NULL)
-               return false;
-
-       *i2c_address = 0;
-
        index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-                       firmwareinfo);
+                                           firmwareinfo);
 
        if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context,
-                               index, &size, &frev, &crev, &data_offset)) {
+                                         index, &size, &frev, &crev,
+                                         &data_offset)) {
                /* support firmware_info 3.4 + */
                if ((frev == 3 && crev >=4) || (frev > 3)) {
                        firmware_info = (union firmware_info *)
                                (mode_info->atom_context->bios + data_offset);
-                       *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr;
+                       /* The ras_rom_i2c_slave_addr should ideally
+                        * be a 19-bit EEPROM address, which would be
+                        * used as is by the driver; see top of
+                        * amdgpu_eeprom.c.
+                        *
+                        * When this is the case, 0 is of course a
+                        * valid RAS EEPROM address, in which case,
+                        * we'll drop the first "if (firm...)" and only
+                        * leave the check for the pointer.
+                        *
+                        * The reason this works right now is because
+                        * ras_rom_i2c_slave_addr contains the EEPROM
+                        * device type qualifier 1010b in the top 4
+                        * bits.
+                        */
+                       if (firmware_info->v34.ras_rom_i2c_slave_addr) {
+                               if (i2c_address)
+                                       *i2c_address = firmware_info->v34.ras_rom_i2c_slave_addr;
+                               return true;
+                       }
                }
        }
 
-       if (*i2c_address != 0)
-               return true;
-
        return false;
 }
 
index 8e5a7ac..7a73167 100644 (file)
@@ -522,6 +522,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
                        break;
                case CHIP_RENOIR:
                case CHIP_VANGOGH:
+               case CHIP_YELLOW_CARP:
                        domain |= AMDGPU_GEM_DOMAIN_GTT;
                        break;
 
index b664029..f18240f 100644 (file)
@@ -1181,7 +1181,12 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x73A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73A5, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73A8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73A9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73AC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73AD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
@@ -1197,6 +1202,11 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x73C0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
        {0x1002, 0x73C1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
        {0x1002, 0x73C3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+       {0x1002, 0x73DA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+       {0x1002, 0x73DB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+       {0x1002, 0x73DC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+       {0x1002, 0x73DD, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
+       {0x1002, 0x73DE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
        {0x1002, 0x73DF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVY_FLOUNDER},
 
        /* DIMGREY_CAVEFISH */
@@ -1204,6 +1214,13 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x73E1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
        {0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
        {0x1002, 0x73E3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+       {0x1002, 0x73E8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+       {0x1002, 0x73E9, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+       {0x1002, 0x73EA, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+       {0x1002, 0x73EB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+       {0x1002, 0x73EC, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+       {0x1002, 0x73ED, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+       {0x1002, 0x73EF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
        {0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
 
        /* Aldebaran */
index d94c541..5a6857c 100644 (file)
@@ -59,6 +59,7 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
        uint64_t vram_mem = 0, gtt_mem = 0, cpu_mem = 0;
        struct drm_file *file = f->private_data;
        struct amdgpu_device *adev = drm_to_adev(file->minor->dev);
+       struct amdgpu_bo *root;
        int ret;
 
        ret = amdgpu_file_to_fpriv(f, &fpriv);
@@ -69,13 +70,19 @@ void amdgpu_show_fdinfo(struct seq_file *m, struct file *f)
        dev = PCI_SLOT(adev->pdev->devfn);
        fn = PCI_FUNC(adev->pdev->devfn);
 
-       ret = amdgpu_bo_reserve(fpriv->vm.root.bo, false);
+       root = amdgpu_bo_ref(fpriv->vm.root.bo);
+       if (!root)
+               return;
+
+       ret = amdgpu_bo_reserve(root, false);
        if (ret) {
                DRM_ERROR("Fail to reserve bo\n");
                return;
        }
        amdgpu_vm_get_memory(&fpriv->vm, &vram_mem, &gtt_mem, &cpu_mem);
-       amdgpu_bo_unreserve(fpriv->vm.root.bo);
+       amdgpu_bo_unreserve(root);
+       amdgpu_bo_unref(&root);
+
        seq_printf(m, "pdev:\t%04x:%02x:%02x.%d\npasid:\t%u\n", domain, bus,
                        dev, fn, fpriv->vm.pasid);
        seq_printf(m, "vram mem:\t%llu kB\n", vram_mem/1024UL);
index 14499f0..8d682be 100644 (file)
@@ -552,6 +552,9 @@ void amdgpu_fence_driver_hw_fini(struct amdgpu_device *adev)
                if (!ring || !ring->fence_drv.initialized)
                        continue;
 
+               if (!ring->no_scheduler)
+                       drm_sched_stop(&ring->sched, NULL);
+
                /* You can't wait for HW to signal if it's gone */
                if (!drm_dev_is_unplugged(&adev->ddev))
                        r = amdgpu_fence_wait_empty(ring);
@@ -611,6 +614,11 @@ void amdgpu_fence_driver_hw_init(struct amdgpu_device *adev)
                if (!ring || !ring->fence_drv.initialized)
                        continue;
 
+               if (!ring->no_scheduler) {
+                       drm_sched_resubmit_jobs(&ring->sched);
+                       drm_sched_start(&ring->sched, true);
+               }
+
                /* enable the interrupt */
                if (ring->fence_drv.irq_src)
                        amdgpu_irq_get(adev, ring->fence_drv.irq_src,
index cb07cc3..d6aa032 100644 (file)
@@ -341,21 +341,18 @@ retry:
        r = amdgpu_gem_object_create(adev, size, args->in.alignment,
                                     initial_domain,
                                     flags, ttm_bo_type_device, resv, &gobj);
-       if (r) {
-               if (r != -ERESTARTSYS) {
-                       if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
-                               flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-                               goto retry;
-                       }
+       if (r && r != -ERESTARTSYS) {
+               if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) {
+                       flags &= ~AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+                       goto retry;
+               }
 
-                       if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
-                               initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
-                               goto retry;
-                       }
-                       DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
-                                 size, initial_domain, args->in.alignment, r);
+               if (initial_domain == AMDGPU_GEM_DOMAIN_VRAM) {
+                       initial_domain |= AMDGPU_GEM_DOMAIN_GTT;
+                       goto retry;
                }
-               return r;
+               DRM_DEBUG("Failed to allocate GEM object (%llu, %d, %llu, %d)\n",
+                               size, initial_domain, args->in.alignment, r);
        }
 
        if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
index 5430003..675a72e 100644 (file)
@@ -118,7 +118,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_resource *res)
  * @man: TTM memory type manager
  * @tbo: TTM BO we need this range for
  * @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @res: the resulting mem object
  *
  * Dummy, allocate the node but no space for it yet.
  */
@@ -182,7 +182,7 @@ err_out:
  * amdgpu_gtt_mgr_del - free ranges
  *
  * @man: TTM memory type manager
- * @mem: TTM memory object
+ * @res: TTM memory object
  *
  * Free the allocated GTT again.
  */
index 23efdc6..9b41cb8 100644 (file)
@@ -469,10 +469,10 @@ psp_cmd_submit_buf(struct psp_context *psp,
         */
        if (!skip_unsupport && (psp->cmd_buf_mem->resp.status || !timeout) && !ras_intr) {
                if (ucode)
-                       DRM_WARN("failed to load ucode (%s) ",
-                                 amdgpu_ucode_name(ucode->ucode_id));
-               DRM_WARN("psp gfx command (%s) failed and response status is (0x%X)\n",
-                        psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id),
+                       DRM_WARN("failed to load ucode %s(0x%X) ",
+                                 amdgpu_ucode_name(ucode->ucode_id), ucode->ucode_id);
+               DRM_WARN("psp gfx command %s(0x%X) failed and response status is (0x%X)\n",
+                        psp_gfx_cmd_name(psp->cmd_buf_mem->cmd_id), psp->cmd_buf_mem->cmd_id,
                         psp->cmd_buf_mem->resp.status);
                if (!timeout) {
                        ret = -EINVAL;
index 9dc3b2d..dc44c94 100644 (file)
@@ -114,27 +114,24 @@ static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
 static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
                                  struct amdgpu_ras_eeprom_control *control)
 {
-       uint8_t ras_rom_i2c_slave_addr;
+       u8 i2c_addr;
 
        if (!control)
                return false;
 
-       control->i2c_address = 0;
-
-       if (amdgpu_atomfirmware_ras_rom_addr(adev, &ras_rom_i2c_slave_addr))
-       {
-               switch (ras_rom_i2c_slave_addr) {
-               case 0xA0:
-                       control->i2c_address = 0;
-                       return true;
-               case 0xA8:
-                       control->i2c_address = 0x40000;
-                       return true;
-               default:
-                       dev_warn(adev->dev, "RAS EEPROM I2C slave address %02x not supported",
-                                ras_rom_i2c_slave_addr);
-                       return false;
-               }
+       if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) {
+               /* The address given by VBIOS is an 8-bit, wire-format
+                * address, i.e. the most significant byte.
+                *
+                * Normalize it to a 19-bit EEPROM address. Remove the
+                * device type identifier and make it a 7-bit address;
+                * then make it a 19-bit EEPROM address. See top of
+                * amdgpu_eeprom.c.
+                */
+               i2c_addr = (i2c_addr & 0x0F) >> 1;
+               control->i2c_address = ((u32) i2c_addr) << 16;
+
+               return true;
        }
 
        switch (adev->asic_type) {
index 2fd77c3..7b2b098 100644 (file)
@@ -361,7 +361,7 @@ static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
  * @man: TTM memory type manager
  * @tbo: TTM BO we need this range for
  * @place: placement flags and restrictions
- * @mem: the resulting mem object
+ * @res: the resulting mem object
  *
  * Allocate VRAM for the given BO.
  */
@@ -487,7 +487,7 @@ error_sub:
  * amdgpu_vram_mgr_del - free ranges
  *
  * @man: TTM memory type manager
- * @mem: TTM memory object
+ * @res: TTM memory object
  *
  * Free the allocated VRAM again.
  */
@@ -522,7 +522,7 @@ static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
  * amdgpu_vram_mgr_alloc_sgt - allocate and fill a sg table
  *
  * @adev: amdgpu device pointer
- * @mem: TTM memory object
+ * @res: TTM memory object
  * @offset: byte offset from the base of VRAM BO
  * @length: number of bytes to export in sg_table
  * @dev: the other device
index ff2307d..23b066b 100644 (file)
@@ -258,6 +258,8 @@ static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
        amdgpu_virt_fini_data_exchange(adev);
        atomic_set(&adev->in_gpu_reset, 1);
 
+       xgpu_ai_mailbox_trans_msg(adev, IDH_READY_TO_RESET, 0, 0, 0);
+
        do {
                if (xgpu_ai_mailbox_peek_msg(adev) == IDH_FLR_NOTIFICATION_CMPL)
                        goto flr_done;
index 5057263..bd3b231 100644 (file)
@@ -37,6 +37,7 @@ enum idh_request {
        IDH_REQ_GPU_RESET_ACCESS,
 
        IDH_LOG_VF_ERROR       = 200,
+       IDH_READY_TO_RESET      = 201,
 };
 
 enum idh_event {
index ba1d3ab..f50045c 100644 (file)
 #define mmRCC_DEV0_EPF0_STRAP0_ALDE                    0x0015
 #define mmRCC_DEV0_EPF0_STRAP0_ALDE_BASE_IDX           2
 
-#define mmBIF_DOORBELL_INT_CNTL_ALDE                   0x3878
+#define mmBIF_DOORBELL_INT_CNTL_ALDE                   0x00fe
 #define mmBIF_DOORBELL_INT_CNTL_ALDE_BASE_IDX          2
 #define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE__SHIFT  0x18
 #define BIF_DOORBELL_INT_CNTL_ALDE__DOORBELL_INTERRUPT_DISABLE_MASK    0x01000000L
 
+#define mmBIF_INTR_CNTL_ALDE                           0x0101
+#define mmBIF_INTR_CNTL_ALDE_BASE_IDX                  2
+
 static void nbio_v7_4_query_ras_error_count(struct amdgpu_device *adev,
                                        void *ras_error_status);
 
@@ -440,14 +443,23 @@ static int nbio_v7_4_set_ras_controller_irq_state(struct amdgpu_device *adev,
         */
        uint32_t bif_intr_cntl;
 
-       bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+       if (adev->asic_type == CHIP_ALDEBARAN)
+               bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE);
+       else
+               bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+
        if (state == AMDGPU_IRQ_STATE_ENABLE) {
                /* set interrupt vector select bit to 0 to select
                 * vetcor 1 for bare metal case */
                bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
                                              BIF_INTR_CNTL,
                                              RAS_INTR_VEC_SEL, 0);
-               WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
+               if (adev->asic_type == CHIP_ALDEBARAN)
+                       WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl);
+               else
+                       WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
        }
 
        return 0;
@@ -476,14 +488,22 @@ static int nbio_v7_4_set_ras_err_event_athub_irq_state(struct amdgpu_device *ade
         */
        uint32_t bif_intr_cntl;
 
-       bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+       if (adev->asic_type == CHIP_ALDEBARAN)
+               bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE);
+       else
+               bif_intr_cntl = RREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL);
+
        if (state == AMDGPU_IRQ_STATE_ENABLE) {
                /* set interrupt vector select bit to 0 to select
                 * vetcor 1 for bare metal case */
                bif_intr_cntl = REG_SET_FIELD(bif_intr_cntl,
                                              BIF_INTR_CNTL,
                                              RAS_INTR_VEC_SEL, 0);
-               WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
+
+               if (adev->asic_type == CHIP_ALDEBARAN)
+                       WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL_ALDE, bif_intr_cntl);
+               else
+                       WREG32_SOC15(NBIO, 0, mmBIF_INTR_CNTL, bif_intr_cntl);
        }
 
        return 0;
index 42a35d9..fe9a7cc 100644 (file)
@@ -904,14 +904,7 @@ static bool vi_asic_supports_baco(struct amdgpu_device *adev)
        case CHIP_POLARIS11:
        case CHIP_POLARIS12:
        case CHIP_TOPAZ:
-               /* Disable BACO support for the specific polaris12 SKU temporarily */
-               if ((adev->pdev->device == 0x699F) &&
-                    (adev->pdev->revision == 0xC7) &&
-                    (adev->pdev->subsystem_vendor == 0x1028) &&
-                    (adev->pdev->subsystem_device == 0x0039))
-                       return false;
-               else
-                       return amdgpu_dpm_is_baco_supported(adev);
+               return amdgpu_dpm_is_baco_supported(adev);
        default:
                return false;
        }
index 491373f..9fc8021 100644 (file)
@@ -2484,7 +2484,8 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid,
        }
        if (!p->xnack_enabled) {
                pr_debug("XNACK not enabled for pasid 0x%x\n", pasid);
-               return -EFAULT;
+               r = -EFAULT;
+               goto out;
        }
        svms = &p->svms;
 
index 8167236..9b1fc54 100644 (file)
@@ -1200,7 +1200,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
        dc_hardware_init(adev->dm.dc);
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-       if (adev->apu_flags) {
+       if ((adev->flags & AMD_IS_APU) && (adev->asic_type >= CHIP_CARRIZO)) {
                struct dc_phy_addr_space_config pa_config;
 
                mmhub_read_system_context(adev, &pa_config);
index cd025c1..330edd6 100644 (file)
@@ -1561,7 +1561,7 @@ bool dc_link_dp_perform_link_training_skip_aux(
        struct dc_link *link,
        const struct dc_link_settings *link_setting)
 {
-       struct link_training_settings lt_settings;
+       struct link_training_settings lt_settings = {0};
 
        dp_decide_training_settings(
                        link,
@@ -1707,7 +1707,7 @@ enum link_training_result dc_link_dp_perform_link_training(
        bool skip_video_pattern)
 {
        enum link_training_result status = LINK_TRAINING_SUCCESS;
-       struct link_training_settings lt_settings;
+       struct link_training_settings lt_settings = {0};
        enum dp_link_encoding encoding =
                        dp_get_link_encoding_format(link_settings);
 
@@ -1923,7 +1923,7 @@ enum link_training_result dc_link_dp_sync_lt_attempt(
     struct dc_link_settings *link_settings,
     struct dc_link_training_overrides *lt_overrides)
 {
-       struct link_training_settings lt_settings;
+       struct link_training_settings lt_settings = {0};
        enum link_training_result lt_status = LINK_TRAINING_SUCCESS;
        enum dp_panel_mode panel_mode = DP_PANEL_MODE_DEFAULT;
        enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_EXTERNAL;
index dc7823d..dd38796 100644 (file)
@@ -510,8 +510,12 @@ static struct stream_encoder *dcn303_stream_encoder_create(enum engine_id eng_id
        vpg = dcn303_vpg_create(ctx, vpg_inst);
        afmt = dcn303_afmt_create(ctx, afmt_inst);
 
-       if (!enc1 || !vpg || !afmt)
+       if (!enc1 || !vpg || !afmt) {
+               kfree(enc1);
+               kfree(vpg);
+               kfree(afmt);
                return NULL;
+       }
 
        dcn30_dio_stream_encoder_construct(enc1, ctx, ctx->dc_bios, eng_id, vpg, afmt, &stream_enc_regs[eng_id],
                        &se_shift, &se_mask);
index 7db268d..3b37213 100644 (file)
@@ -109,7 +109,7 @@ bool dcn31_is_panel_backlight_on(struct panel_cntl *panel_cntl)
        union dmub_rb_cmd cmd;
 
        if (!dcn31_query_backlight_info(panel_cntl, &cmd))
-               return 0;
+               return false;
 
        return cmd.panel_cntl.data.is_backlight_on;
 }
@@ -119,7 +119,7 @@ bool dcn31_is_panel_powered_on(struct panel_cntl *panel_cntl)
        union dmub_rb_cmd cmd;
 
        if (!dcn31_query_backlight_info(panel_cntl, &cmd))
-               return 0;
+               return false;
 
        return cmd.panel_cntl.data.is_powered_on;
 }
index fbed530..63bbdf8 100644 (file)
@@ -2641,7 +2641,7 @@ static void dml20v2_DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndP
                for (k = 0; k < mode_lib->vba.NumberOfActivePlanes; ++k) {
                        if (mode_lib->vba.PrefetchMode[mode_lib->vba.VoltageLevel][mode_lib->vba.maxMpcComb] == 0) {
                                if (mode_lib->vba.DRAMClockChangeWatermark >
-                               dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark))
+                                       dml_max(mode_lib->vba.StutterEnterPlusExitWatermark, mode_lib->vba.UrgentWatermark))
                                        mode_lib->vba.MinTTUVBlank[k] += 25;
                        }
                }
index 2d55627..249cb0a 100644 (file)
@@ -2005,10 +2005,10 @@ static int ss_bias_attr_update(struct amdgpu_device *adev, struct amdgpu_device_
 static struct amdgpu_device_attr amdgpu_device_attrs[] = {
        AMDGPU_DEVICE_ATTR_RW(power_dpm_state,                          ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
        AMDGPU_DEVICE_ATTR_RW(power_dpm_force_performance_level,        ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
-       AMDGPU_DEVICE_ATTR_RO(pp_num_states,                            ATTR_FLAG_BASIC),
-       AMDGPU_DEVICE_ATTR_RO(pp_cur_state,                             ATTR_FLAG_BASIC),
-       AMDGPU_DEVICE_ATTR_RW(pp_force_state,                           ATTR_FLAG_BASIC),
-       AMDGPU_DEVICE_ATTR_RW(pp_table,                                 ATTR_FLAG_BASIC),
+       AMDGPU_DEVICE_ATTR_RO(pp_num_states,                            ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+       AMDGPU_DEVICE_ATTR_RO(pp_cur_state,                             ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+       AMDGPU_DEVICE_ATTR_RW(pp_force_state,                           ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
+       AMDGPU_DEVICE_ATTR_RW(pp_table,                                 ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
        AMDGPU_DEVICE_ATTR_RW(pp_dpm_sclk,                              ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
        AMDGPU_DEVICE_ATTR_RW(pp_dpm_mclk,                              ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
        AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk,                            ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF),
index 715b422..8156729 100644 (file)
@@ -1335,6 +1335,30 @@ enum smu_cmn2asic_mapping_type {
 #define WORKLOAD_MAP(profile, workload) \
        [profile] = {1, (workload)}
 
+/**
+ * smu_memcpy_trailing - Copy the end of one structure into the middle of another
+ *
+ * @dst: Pointer to destination struct
+ * @first_dst_member: The member name in @dst where the overwrite begins
+ * @last_dst_member: The member name in @dst where the overwrite ends after
+ * @src: Pointer to the source struct
+ * @first_src_member: The member name in @src where the copy begins
+ *
+ */
+#define smu_memcpy_trailing(dst, first_dst_member, last_dst_member,       \
+                           src, first_src_member)                         \
+({                                                                        \
+       size_t __src_offset = offsetof(typeof(*(src)), first_src_member);  \
+       size_t __src_size = sizeof(*(src)) - __src_offset;                 \
+       size_t __dst_offset = offsetof(typeof(*(dst)), first_dst_member);  \
+       size_t __dst_size = offsetofend(typeof(*(dst)), last_dst_member) - \
+                           __dst_offset;                                  \
+       BUILD_BUG_ON(__src_size != __dst_size);                            \
+       __builtin_memcpy((u8 *)(dst) + __dst_offset,                       \
+                        (u8 *)(src) + __src_offset,                       \
+                        __dst_size);                                      \
+})
+
 #if !defined(SWSMU_CODE_LAYER_L2) && !defined(SWSMU_CODE_LAYER_L3) && !defined(SWSMU_CODE_LAYER_L4)
 int smu_get_power_limit(void *handle,
                        uint32_t *limit,
index 465ff8d..e7803ce 100644 (file)
@@ -27,6 +27,9 @@
 #include <linux/pci.h>
 #include <linux/slab.h>
 #include <asm/div64.h>
+#if IS_ENABLED(CONFIG_X86_64)
+#include <asm/intel-family.h>
+#endif
 #include <drm/amdgpu_drm.h>
 #include "ppatomctrl.h"
 #include "atombios.h"
@@ -1733,6 +1736,17 @@ static int smu7_disable_dpm_tasks(struct pp_hwmgr *hwmgr)
        return result;
 }
 
+static bool intel_core_rkl_chk(void)
+{
+#if IS_ENABLED(CONFIG_X86_64)
+       struct cpuinfo_x86 *c = &cpu_data(0);
+
+       return (c->x86 == 6 && c->x86_model == INTEL_FAM6_ROCKETLAKE);
+#else
+       return false;
+#endif
+}
+
 static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 {
        struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -1758,7 +1772,8 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 
        data->mclk_dpm_key_disabled = hwmgr->feature_mask & PP_MCLK_DPM_MASK ? false : true;
        data->sclk_dpm_key_disabled = hwmgr->feature_mask & PP_SCLK_DPM_MASK ? false : true;
-       data->pcie_dpm_key_disabled = hwmgr->feature_mask & PP_PCIE_DPM_MASK ? false : true;
+       data->pcie_dpm_key_disabled =
+               intel_core_rkl_chk() || !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
        /* need to set voltage control types before EVV patching */
        data->voltage_control = SMU7_VOLTAGE_CONTROL_NONE;
        data->vddci_control = SMU7_VOLTAGE_CONTROL_NONE;
index 273df66..e343cc2 100644 (file)
@@ -483,10 +483,8 @@ static int arcturus_append_powerplay_table(struct smu_context *smu)
 
        if ((smc_dpm_table->table_header.format_revision == 4) &&
            (smc_dpm_table->table_header.content_revision == 6))
-               memcpy(&smc_pptable->MaxVoltageStepGfx,
-                      &smc_dpm_table->maxvoltagestepgfx,
-                      sizeof(*smc_dpm_table) - offsetof(struct atom_smc_dpm_info_v4_6, maxvoltagestepgfx));
-
+               smu_memcpy_trailing(smc_pptable, MaxVoltageStepGfx, BoardReserved,
+                                   smc_dpm_table, maxvoltagestepgfx);
        return 0;
 }
 
index f966817..a5fc5d7 100644 (file)
@@ -431,16 +431,16 @@ static int navi10_append_powerplay_table(struct smu_context *smu)
 
        switch (smc_dpm_table->table_header.content_revision) {
        case 5: /* nv10 and nv14 */
-               memcpy(smc_pptable->I2cControllers, smc_dpm_table->I2cControllers,
-                       sizeof(*smc_dpm_table) - sizeof(smc_dpm_table->table_header));
+               smu_memcpy_trailing(smc_pptable, I2cControllers, BoardReserved,
+                                   smc_dpm_table, I2cControllers);
                break;
        case 7: /* nv12 */
                ret = amdgpu_atombios_get_data_table(adev, index, NULL, NULL, NULL,
                                              (uint8_t **)&smc_dpm_table_v4_7);
                if (ret)
                        return ret;
-               memcpy(smc_pptable->I2cControllers, smc_dpm_table_v4_7->I2cControllers,
-                       sizeof(*smc_dpm_table_v4_7) - sizeof(smc_dpm_table_v4_7->table_header));
+               smu_memcpy_trailing(smc_pptable, I2cControllers, BoardReserved,
+                                   smc_dpm_table_v4_7, I2cControllers);
                break;
        default:
                dev_err(smu->adev->dev, "smc_dpm_info with unsupported content revision %d!\n",
index 6eb50b0..3a34214 100644 (file)
@@ -1869,7 +1869,7 @@ static int vangogh_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TAB
                } else {
                        if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) {
                                dev_err(smu->adev->dev,
-                                       "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
+                                       "The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
                                        smu->gfx_actual_hard_min_freq,
                                        smu->gfx_actual_soft_max_freq);
                                return -EINVAL;
index b391380..5aa175e 100644 (file)
@@ -426,7 +426,7 @@ static int renoir_od_edit_dpm_table(struct smu_context *smu,
                } else {
                        if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) {
                                dev_err(smu->adev->dev,
-                                       "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
+                                       "The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
                                        smu->gfx_actual_hard_min_freq,
                                        smu->gfx_actual_soft_max_freq);
                                return -EINVAL;
index ec8c30d..ab65202 100644 (file)
@@ -409,9 +409,8 @@ static int aldebaran_append_powerplay_table(struct smu_context *smu)
 
        if ((smc_dpm_table->table_header.format_revision == 4) &&
            (smc_dpm_table->table_header.content_revision == 10))
-               memcpy(&smc_pptable->GfxMaxCurrent,
-                      &smc_dpm_table->GfxMaxCurrent,
-                      sizeof(*smc_dpm_table) - offsetof(struct atom_smc_dpm_info_v4_10, GfxMaxCurrent));
+               smu_memcpy_trailing(smc_pptable, GfxMaxCurrent, reserved,
+                                   smc_dpm_table, GfxMaxCurrent);
        return 0;
 }
 
index 0f17c25..627ba2e 100644 (file)
@@ -731,7 +731,7 @@ static int yellow_carp_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM
                } else {
                        if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) {
                                dev_err(smu->adev->dev,
-                                       "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
+                                       "The setting minimum sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n",
                                        smu->gfx_actual_hard_min_freq,
                                        smu->gfx_actual_soft_max_freq);
                                return -EINVAL;
index 51dbe0e..d2969f6 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef INTEL_GT_REQUESTS_H
 #define INTEL_GT_REQUESTS_H
 
-#include <stddef.h>
+#include <linux/stddef.h>
 
 struct intel_engine_cs;
 struct intel_gt;
index 7c903cf..e9ae22b 100644 (file)
@@ -124,6 +124,7 @@ static int mgag200_pixpll_compute_g200se_00(struct mgag200_pll *pixpll, long clo
        unsigned int computed;
 
        m = n = p = s = 0;
+       delta = 0xffffffff;
        permitteddelta = clock * 5 / 1000;
 
        for (testp = 8; testp > 0; testp /= 2) {
index 0da5b31..dfe5f1d 100644 (file)
@@ -58,25 +58,16 @@ static int write_cmd(struct panfrost_device *pfdev, u32 as_nr, u32 cmd)
 }
 
 static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
-                       u64 iova, size_t size)
+                       u64 iova, u64 size)
 {
        u8 region_width;
        u64 region = iova & PAGE_MASK;
-       /*
-        * fls returns:
-        * 1 .. 32
-        *
-        * 10 + fls(num_pages)
-        * results in the range (11 .. 42)
-        */
-
-       size = round_up(size, PAGE_SIZE);
 
-       region_width = 10 + fls(size >> PAGE_SHIFT);
-       if ((size >> PAGE_SHIFT) != (1ul << (region_width - 11))) {
-               /* not pow2, so must go up to the next pow2 */
-               region_width += 1;
-       }
+       /* The size is encoded as ceil(log2) minus(1), which may be calculated
+        * with fls. The size must be clamped to hardware bounds.
+        */
+       size = max_t(u64, size, AS_LOCK_REGION_MIN_SIZE);
+       region_width = fls64(size - 1) - 1;
        region |= region_width;
 
        /* Lock the region that needs to be updated */
@@ -87,7 +78,7 @@ static void lock_region(struct panfrost_device *pfdev, u32 as_nr,
 
 
 static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr,
-                                     u64 iova, size_t size, u32 op)
+                                     u64 iova, u64 size, u32 op)
 {
        if (as_nr < 0)
                return 0;
@@ -104,7 +95,7 @@ static int mmu_hw_do_operation_locked(struct panfrost_device *pfdev, int as_nr,
 
 static int mmu_hw_do_operation(struct panfrost_device *pfdev,
                               struct panfrost_mmu *mmu,
-                              u64 iova, size_t size, u32 op)
+                              u64 iova, u64 size, u32 op)
 {
        int ret;
 
@@ -121,7 +112,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m
        u64 transtab = cfg->arm_mali_lpae_cfg.transtab;
        u64 memattr = cfg->arm_mali_lpae_cfg.memattr;
 
-       mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
+       mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
 
        mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), transtab & 0xffffffffUL);
        mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), transtab >> 32);
@@ -137,7 +128,7 @@ static void panfrost_mmu_enable(struct panfrost_device *pfdev, struct panfrost_m
 
 static void panfrost_mmu_disable(struct panfrost_device *pfdev, u32 as_nr)
 {
-       mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0UL, AS_COMMAND_FLUSH_MEM);
+       mmu_hw_do_operation_locked(pfdev, as_nr, 0, ~0ULL, AS_COMMAND_FLUSH_MEM);
 
        mmu_write(pfdev, AS_TRANSTAB_LO(as_nr), 0);
        mmu_write(pfdev, AS_TRANSTAB_HI(as_nr), 0);
@@ -251,7 +242,7 @@ static size_t get_pgsize(u64 addr, size_t size)
 
 static void panfrost_mmu_flush_range(struct panfrost_device *pfdev,
                                     struct panfrost_mmu *mmu,
-                                    u64 iova, size_t size)
+                                    u64 iova, u64 size)
 {
        if (mmu->as < 0)
                return;
index 1940ff8..6c5a11e 100644 (file)
 #define AS_FAULTSTATUS_ACCESS_TYPE_READ                (0x2 << 8)
 #define AS_FAULTSTATUS_ACCESS_TYPE_WRITE       (0x3 << 8)
 
+#define AS_LOCK_REGION_MIN_SIZE                 (1ULL << 15)
+
 #define gpu_write(dev, reg, data) writel(data, dev->iomem + reg)
 #define gpu_read(dev, reg) readl(dev->iomem + reg)
 
index ea4add2..bb9e02c 100644 (file)
@@ -1160,9 +1160,9 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx,
        }
 
        if (bo->deleted) {
-               ttm_bo_cleanup_refs(bo, false, false, locked);
+               ret = ttm_bo_cleanup_refs(bo, false, false, locked);
                ttm_bo_put(bo);
-               return 0;
+               return ret == -EBUSY ? -ENOSPC : ret;
        }
 
        ttm_bo_del_from_lru(bo);
@@ -1216,7 +1216,7 @@ out:
        if (locked)
                dma_resv_unlock(bo->base.resv);
        ttm_bo_put(bo);
-       return ret;
+       return ret == -EBUSY ? -ENOSPC : ret;
 }
 
 void ttm_bo_tt_destroy(struct ttm_buffer_object *bo)
index 763fa6f..1c5ffe2 100644 (file)
@@ -143,7 +143,6 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
        struct ttm_resource *src_mem = bo->resource;
        struct ttm_resource_manager *src_man =
                ttm_manager_type(bdev, src_mem->mem_type);
-       struct ttm_resource src_copy = *src_mem;
        union {
                struct ttm_kmap_iter_tt tt;
                struct ttm_kmap_iter_linear_io io;
@@ -173,11 +172,11 @@ int ttm_bo_move_memcpy(struct ttm_buffer_object *bo,
        }
 
        ttm_move_memcpy(bo, dst_mem->num_pages, dst_iter, src_iter);
-       src_copy = *src_mem;
-       ttm_bo_move_sync_cleanup(bo, dst_mem);
 
        if (!src_iter->ops->maps_tt)
-               ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, &src_copy);
+               ttm_kmap_iter_linear_io_fini(&_src_iter.io, bdev, src_mem);
+       ttm_bo_move_sync_cleanup(bo, dst_mem);
+
 out_src_iter:
        if (!dst_iter->ops->maps_tt)
                ttm_kmap_iter_linear_io_fini(&_dst_iter.io, bdev, dst_mem);
index 24031a8..d5cd8b5 100644 (file)
@@ -32,7 +32,6 @@
 #define pr_fmt(fmt) "[TTM] " fmt
 
 #include <linux/sched.h>
-#include <linux/pagemap.h>
 #include <linux/shmem_fs.h>
 #include <linux/file.h>
 #include <drm/drm_cache.h>
index b7dc32a..4a11150 100644 (file)
@@ -1462,7 +1462,7 @@ static const struct hdmi_codec_ops vc4_hdmi_codec_ops = {
        .audio_startup = vc4_hdmi_audio_startup,
 };
 
-struct hdmi_codec_pdata vc4_hdmi_codec_pdata = {
+static struct hdmi_codec_pdata vc4_hdmi_codec_pdata = {
        .ops = &vc4_hdmi_codec_ops,
        .max_i2s_channels = 8,
        .i2s = 1,
index 3048862..408dfbc 100644 (file)
@@ -1309,7 +1309,7 @@ out:
 static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp,
                                     struct bnxt_re_pd *pd)
 {
-       struct bnxt_re_sqp_entries *sqp_tbl = NULL;
+       struct bnxt_re_sqp_entries *sqp_tbl;
        struct bnxt_re_dev *rdev;
        struct bnxt_re_qp *sqp;
        struct bnxt_re_ah *sah;
@@ -1317,7 +1317,7 @@ static int bnxt_re_create_shadow_gsi(struct bnxt_re_qp *qp,
 
        rdev = qp->rdev;
        /* Create a shadow QP to handle the QP1 traffic */
-       sqp_tbl = kzalloc(sizeof(*sqp_tbl) * BNXT_RE_MAX_GSI_SQP_ENTRIES,
+       sqp_tbl = kcalloc(BNXT_RE_MAX_GSI_SQP_ENTRIES, sizeof(*sqp_tbl),
                          GFP_KERNEL);
        if (!sqp_tbl)
                return -ENOMEM;
index d9b5bbb..8302469 100644 (file)
@@ -488,7 +488,7 @@ struct hfi1_ctxt_hist {
        atomic_t data[255];
 };
 
-struct hfi1_ctxt_hist hist = {
+static struct hfi1_ctxt_hist hist = {
        .count = ATOMIC_INIT(0)
 };
 
index a520ac8..3be36eb 100644 (file)
@@ -995,7 +995,7 @@ static struct mlx5_ib_mr *alloc_cacheable_mr(struct ib_pd *pd,
 static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
 {
        const size_t xlt_chunk_align =
-               MLX5_UMR_MTT_ALIGNMENT / sizeof(ent_size);
+               MLX5_UMR_MTT_ALIGNMENT / ent_size;
        size_t size;
        void *res = NULL;
 
@@ -1024,7 +1024,7 @@ static void *mlx5_ib_alloc_xlt(size_t *nents, size_t ent_size, gfp_t gfp_mask)
 
        if (size > MLX5_SPARE_UMR_CHUNK) {
                size = MLX5_SPARE_UMR_CHUNK;
-               *nents = get_order(size) / ent_size;
+               *nents = size / ent_size;
                res = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
                                               get_order(size));
                if (res)
index d57e49d..452e235 100644 (file)
@@ -403,9 +403,11 @@ static ssize_t diagc_attr_store(struct ib_device *ibdev, u32 port_num,
 }
 
 #define QIB_DIAGC_ATTR(N)                                                      \
+       static_assert(&((struct qib_ibport *)0)->rvp.n_##N != (u64 *)NULL);    \
        static struct qib_diagc_attr qib_diagc_attr_##N = {                    \
                .attr = __ATTR(N, 0664, diagc_attr_show, diagc_attr_store),    \
-               .counter = &((struct qib_ibport *)0)->rvp.n_##N - (u64 *)0,    \
+               .counter =                                                     \
+                       offsetof(struct qib_ibport, rvp.n_##N) / sizeof(u64)   \
        }
 
 QIB_DIAGC_ATTR(rc_resends);
index 8ad8618..124c41a 100644 (file)
@@ -82,7 +82,7 @@ config IOMMU_DEBUGFS
 choice
        prompt "IOMMU default domain type"
        depends on IOMMU_API
-       default IOMMU_DEFAULT_DMA_LAZY if AMD_IOMMU || INTEL_IOMMU
+       default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64
        default IOMMU_DEFAULT_DMA_STRICT
        help
          Choose the type of IOMMU domain used to manage DMA API usage by
index bdcf167..2a822b2 100644 (file)
@@ -297,6 +297,22 @@ int amd_iommu_get_num_iommus(void)
        return amd_iommus_present;
 }
 
+#ifdef CONFIG_IRQ_REMAP
+static bool check_feature_on_all_iommus(u64 mask)
+{
+       bool ret = false;
+       struct amd_iommu *iommu;
+
+       for_each_iommu(iommu) {
+               ret = iommu_feature(iommu, mask);
+               if (!ret)
+                       return false;
+       }
+
+       return true;
+}
+#endif
+
 /*
  * For IVHD type 0x11/0x40, EFR is also available via IVHD.
  * Default to IVHD EFR since it is available sooner
@@ -813,9 +829,9 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
        return 0;
 }
 
-#ifdef CONFIG_IRQ_REMAP
 static int iommu_init_ga_log(struct amd_iommu *iommu)
 {
+#ifdef CONFIG_IRQ_REMAP
        u64 entry;
 
        if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
@@ -845,25 +861,9 @@ static int iommu_init_ga_log(struct amd_iommu *iommu)
 err_out:
        free_ga_log(iommu);
        return -EINVAL;
-}
-#endif /* CONFIG_IRQ_REMAP */
-
-static int iommu_init_ga(struct amd_iommu *iommu)
-{
-       int ret = 0;
-
-#ifdef CONFIG_IRQ_REMAP
-       /* Note: We have already checked GASup from IVRS table.
-        *       Now, we need to make sure that GAMSup is set.
-        */
-       if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
-           !iommu_feature(iommu, FEATURE_GAM_VAPIC))
-               amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
-
-       ret = iommu_init_ga_log(iommu);
+#else
+       return 0;
 #endif /* CONFIG_IRQ_REMAP */
-
-       return ret;
 }
 
 static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
@@ -1845,7 +1845,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
        if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
                return -ENOMEM;
 
-       ret = iommu_init_ga(iommu);
+       ret = iommu_init_ga_log(iommu);
        if (ret)
                return ret;
 
@@ -2479,6 +2479,14 @@ static void early_enable_iommus(void)
        }
 
 #ifdef CONFIG_IRQ_REMAP
+       /*
+        * Note: We have already checked GASup from IVRS table.
+        *       Now, we need to make sure that GAMSup is set.
+        */
+       if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
+           !check_feature_on_all_iommus(FEATURE_GAM_VAPIC))
+               amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
+
        if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
                amd_iommu_irq_ops.capability |= (1 << IRQ_POSTING_CAP);
 #endif
index 2014fe8..0c22878 100644 (file)
@@ -514,9 +514,6 @@ static void load_pasid(struct mm_struct *mm, u32 pasid)
 {
        mutex_lock(&mm->context.lock);
 
-       /* Synchronize with READ_ONCE in update_pasid(). */
-       smp_store_release(&mm->pasid, pasid);
-
        /* Update PASID MSR on all CPUs running the mm's tasks. */
        on_each_cpu_mask(mm_cpumask(mm), _load_pasid, NULL, true);
 
@@ -792,7 +789,19 @@ prq_retry:
                goto prq_retry;
        }
 
+       /*
+        * A work in IO page fault workqueue may try to lock pasid_mutex now.
+        * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for
+        * all works in the workqueue to finish may cause deadlock.
+        *
+        * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev().
+        * Unlock it to allow the works to be handled while waiting for
+        * them to finish.
+        */
+       lockdep_assert_held(&pasid_mutex);
+       mutex_unlock(&pasid_mutex);
        iopf_queue_flush_dev(dev);
+       mutex_lock(&pasid_mutex);
 
        /*
         * Perform steps described in VT-d spec CH7.10 to drain page
index b03eabc..2111daa 100644 (file)
@@ -809,14 +809,9 @@ EXPORT_SYMBOL_GPL(dm_table_set_type);
 int device_not_dax_capable(struct dm_target *ti, struct dm_dev *dev,
                        sector_t start, sector_t len, void *data)
 {
-       int blocksize = *(int *) data, id;
-       bool rc;
+       int blocksize = *(int *) data;
 
-       id = dax_read_lock();
-       rc = !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
-       dax_read_unlock(id);
-
-       return rc;
+       return !dax_supported(dev->dax_dev, dev->bdev, blocksize, start, len);
 }
 
 /* Check devices support synchronous DAX */
index 84e9145..a011d09 100644 (file)
@@ -654,7 +654,7 @@ static int open_table_device(struct table_device *td, dev_t dev,
        }
 
        td->dm_dev.bdev = bdev;
-       td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+       td->dm_dev.dax_dev = fs_dax_get_by_bdev(bdev);
        return 0;
 }
 
index 5d8b482..6ebe3c7 100644 (file)
@@ -10,4 +10,5 @@ HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
                common/asid.o common/habanalabs_ioctl.o \
                common/command_buffer.o common/hw_queue.o common/irq.o \
                common/sysfs.o common/hwmon.o common/memory.o \
-               common/command_submission.o common/firmware_if.o
+               common/command_submission.o common/firmware_if.o \
+               common/state_dump.o
index 719168c..8132a84 100644 (file)
@@ -314,8 +314,6 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
 
        spin_lock(&mgr->cb_lock);
        rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_ATOMIC);
-       if (rc < 0)
-               rc = idr_alloc(&mgr->cb_handles, cb, 1, 0, GFP_KERNEL);
        spin_unlock(&mgr->cb_lock);
 
        if (rc < 0) {
@@ -552,7 +550,7 @@ int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
 
        vma->vm_private_data = cb;
 
-       rc = hdev->asic_funcs->cb_mmap(hdev, vma, cb->kernel_address,
+       rc = hdev->asic_funcs->mmap(hdev, vma, cb->kernel_address,
                                        cb->bus_address, cb->size);
        if (rc) {
                spin_lock(&cb->lock);
index 80c60fb..7b0516c 100644 (file)
@@ -38,7 +38,11 @@ static void hl_sob_reset(struct kref *ref)
                                                        kref);
        struct hl_device *hdev = hw_sob->hdev;
 
+       dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id);
+
        hdev->asic_funcs->reset_sob(hdev, hw_sob);
+
+       hw_sob->need_reset = false;
 }
 
 void hl_sob_reset_error(struct kref *ref)
@@ -52,6 +56,24 @@ void hl_sob_reset_error(struct kref *ref)
                hw_sob->q_idx, hw_sob->sob_id);
 }
 
+void hw_sob_put(struct hl_hw_sob *hw_sob)
+{
+       if (hw_sob)
+               kref_put(&hw_sob->kref, hl_sob_reset);
+}
+
+static void hw_sob_put_err(struct hl_hw_sob *hw_sob)
+{
+       if (hw_sob)
+               kref_put(&hw_sob->kref, hl_sob_reset_error);
+}
+
+void hw_sob_get(struct hl_hw_sob *hw_sob)
+{
+       if (hw_sob)
+               kref_get(&hw_sob->kref);
+}
+
 /**
  * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet
  * @sob_base: sob base id
@@ -84,76 +106,29 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask)
        return 0;
 }
 
-static void sob_reset_work(struct work_struct *work)
-{
-       struct hl_cs_compl *hl_cs_cmpl =
-               container_of(work, struct hl_cs_compl, sob_reset_work);
-       struct hl_device *hdev = hl_cs_cmpl->hdev;
-
-       /*
-        * A signal CS can get completion while the corresponding wait
-        * for signal CS is on its way to the PQ. The wait for signal CS
-        * will get stuck if the signal CS incremented the SOB to its
-        * max value and there are no pending (submitted) waits on this
-        * SOB.
-        * We do the following to void this situation:
-        * 1. The wait for signal CS must get a ref for the signal CS as
-        *    soon as possible in cs_ioctl_signal_wait() and put it
-        *    before being submitted to the PQ but after it incremented
-        *    the SOB refcnt in init_signal_wait_cs().
-        * 2. Signal/Wait for signal CS will decrement the SOB refcnt
-        *    here.
-        * These two measures guarantee that the wait for signal CS will
-        * reset the SOB upon completion rather than the signal CS and
-        * hence the above scenario is avoided.
-        */
-       kref_put(&hl_cs_cmpl->hw_sob->kref, hl_sob_reset);
-
-       if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
-               hdev->asic_funcs->reset_sob_group(hdev,
-                               hl_cs_cmpl->sob_group);
-
-       kfree(hl_cs_cmpl);
-}
-
 static void hl_fence_release(struct kref *kref)
 {
        struct hl_fence *fence =
                container_of(kref, struct hl_fence, refcount);
        struct hl_cs_compl *hl_cs_cmpl =
                container_of(fence, struct hl_cs_compl, base_fence);
-       struct hl_device *hdev = hl_cs_cmpl->hdev;
-
-       /* EBUSY means the CS was never submitted and hence we don't have
-        * an attached hw_sob object that we should handle here
-        */
-       if (fence->error == -EBUSY)
-               goto free;
-
-       if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
-               (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
-               (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)) {
-
-               dev_dbg(hdev->dev,
-                       "CS 0x%llx type %d finished, sob_id: %d, sob_val: 0x%x\n",
-                       hl_cs_cmpl->cs_seq,
-                       hl_cs_cmpl->type,
-                       hl_cs_cmpl->hw_sob->sob_id,
-                       hl_cs_cmpl->sob_val);
-
-               queue_work(hdev->sob_reset_wq, &hl_cs_cmpl->sob_reset_work);
 
-               return;
-       }
-
-free:
        kfree(hl_cs_cmpl);
 }
 
 void hl_fence_put(struct hl_fence *fence)
 {
-       if (fence)
-               kref_put(&fence->refcount, hl_fence_release);
+       if (IS_ERR_OR_NULL(fence))
+               return;
+       kref_put(&fence->refcount, hl_fence_release);
+}
+
+void hl_fences_put(struct hl_fence **fence, int len)
+{
+       int i;
+
+       for (i = 0; i < len; i++, fence++)
+               hl_fence_put(*fence);
 }
 
 void hl_fence_get(struct hl_fence *fence)
@@ -473,11 +448,139 @@ static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
        spin_unlock(&hdev->cs_mirror_lock);
 }
 
+/*
+ * force_complete_multi_cs - complete all contexts that wait on multi-CS
+ *
+ * @hdev: pointer to habanalabs device structure
+ */
+static void force_complete_multi_cs(struct hl_device *hdev)
+{
+       int i;
+
+       for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
+               struct multi_cs_completion *mcs_compl;
+
+               mcs_compl = &hdev->multi_cs_completion[i];
+
+               spin_lock(&mcs_compl->lock);
+
+               if (!mcs_compl->used) {
+                       spin_unlock(&mcs_compl->lock);
+                       continue;
+               }
+
+               /* when calling force complete no context should be waiting on
+                * multi-cS.
+                * We are calling the function as a protection for such case
+                * to free any pending context and print error message
+                */
+               dev_err(hdev->dev,
+                               "multi-CS completion context %d still waiting when calling force completion\n",
+                               i);
+               complete_all(&mcs_compl->completion);
+               spin_unlock(&mcs_compl->lock);
+       }
+}
+
+/*
+ * complete_multi_cs - complete all waiting entities on multi-CS
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @cs: CS structure
+ * The function signals a waiting entity that has an overlapping stream masters
+ * with the completed CS.
+ * For example:
+ * - a completed CS worked on stream master QID 4, multi CS completion
+ *   is actively waiting on stream master QIDs 3, 5. don't send signal as no
+ *   common stream master QID
+ * - a completed CS worked on stream master QID 4, multi CS completion
+ *   is actively waiting on stream master QIDs 3, 4. send signal as stream
+ *   master QID 4 is common
+ */
+static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs)
+{
+       struct hl_fence *fence = cs->fence;
+       int i;
+
+       /* in case of multi CS check for completion only for the first CS */
+       if (cs->staged_cs && !cs->staged_first)
+               return;
+
+       for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
+               struct multi_cs_completion *mcs_compl;
+
+               mcs_compl = &hdev->multi_cs_completion[i];
+               if (!mcs_compl->used)
+                       continue;
+
+               spin_lock(&mcs_compl->lock);
+
+               /*
+                * complete if:
+                * 1. still waiting for completion
+                * 2. the completed CS has at least one overlapping stream
+                *    master with the stream masters in the completion
+                */
+               if (mcs_compl->used &&
+                               (fence->stream_master_qid_map &
+                                       mcs_compl->stream_master_qid_map)) {
+                       /* extract the timestamp only of first completed CS */
+                       if (!mcs_compl->timestamp)
+                               mcs_compl->timestamp =
+                                               ktime_to_ns(fence->timestamp);
+                       complete_all(&mcs_compl->completion);
+               }
+
+               spin_unlock(&mcs_compl->lock);
+       }
+}
+
+static inline void cs_release_sob_reset_handler(struct hl_device *hdev,
+                                       struct hl_cs *cs,
+                                       struct hl_cs_compl *hl_cs_cmpl)
+{
+       /* Skip this handler if the cs wasn't submitted, to avoid putting
+        * the hw_sob twice, since this case already handled at this point,
+        * also skip if the hw_sob pointer wasn't set.
+        */
+       if (!hl_cs_cmpl->hw_sob || !cs->submitted)
+               return;
+
+       spin_lock(&hl_cs_cmpl->lock);
+
+       /*
+        * we get refcount upon reservation of signals or signal/wait cs for the
+        * hw_sob object, and need to put it when the first staged cs
+        * (which cotains the encaps signals) or cs signal/wait is completed.
+        */
+       if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) ||
+                       (hl_cs_cmpl->type == CS_TYPE_WAIT) ||
+                       (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) ||
+                       (!!hl_cs_cmpl->encaps_signals)) {
+               dev_dbg(hdev->dev,
+                               "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n",
+                               hl_cs_cmpl->cs_seq,
+                               hl_cs_cmpl->type,
+                               hl_cs_cmpl->hw_sob->sob_id,
+                               hl_cs_cmpl->sob_val);
+
+               hw_sob_put(hl_cs_cmpl->hw_sob);
+
+               if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT)
+                       hdev->asic_funcs->reset_sob_group(hdev,
+                                       hl_cs_cmpl->sob_group);
+       }
+
+       spin_unlock(&hl_cs_cmpl->lock);
+}
+
 static void cs_do_release(struct kref *ref)
 {
        struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
        struct hl_device *hdev = cs->ctx->hdev;
        struct hl_cs_job *job, *tmp;
+       struct hl_cs_compl *hl_cs_cmpl =
+                       container_of(cs->fence, struct hl_cs_compl, base_fence);
 
        cs->completed = true;
 
@@ -493,8 +596,9 @@ static void cs_do_release(struct kref *ref)
                complete_job(hdev, job);
 
        if (!cs->submitted) {
-               /* In case the wait for signal CS was submitted, the put occurs
-                * in init_signal_wait_cs() or collective_wait_init_cs()
+               /*
+                * In case the wait for signal CS was submitted, the fence put
+                * occurs in init_signal_wait_cs() or collective_wait_init_cs()
                 * right before hanging on the PQ.
                 */
                if (cs->type == CS_TYPE_WAIT ||
@@ -535,8 +639,20 @@ static void cs_do_release(struct kref *ref)
                        list_del(&cs->staged_cs_node);
                        spin_unlock(&hdev->cs_mirror_lock);
                }
+
+               /* decrement refcount to handle when first staged cs
+                * with encaps signals is completed.
+                */
+               if (hl_cs_cmpl->encaps_signals)
+                       kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount,
+                                               hl_encaps_handle_do_release);
        }
 
+       if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
+                       && cs->encaps_signals)
+               kref_put(&cs->encaps_sig_hdl->refcount,
+                                       hl_encaps_handle_do_release);
+
 out:
        /* Must be called before hl_ctx_put because inside we use ctx to get
         * the device
@@ -566,6 +682,10 @@ out:
        if (cs->timestamp)
                cs->fence->timestamp = ktime_get();
        complete_all(&cs->fence->completion);
+       complete_multi_cs(hdev, cs);
+
+       cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl);
+
        hl_fence_put(cs->fence);
 
        kfree(cs->jobs_in_queue_cnt);
@@ -621,6 +741,10 @@ static void cs_timedout(struct work_struct *work)
                break;
        }
 
+       rc = hl_state_dump(hdev);
+       if (rc)
+               dev_err(hdev->dev, "Error during system state dump %d\n", rc);
+
        cs_put(cs);
 
        if (likely(!skip_reset_on_timeout)) {
@@ -661,6 +785,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
        cs->completed = false;
        cs->type = cs_type;
        cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
+       cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
        cs->timeout_jiffies = timeout;
        cs->skip_reset_on_timeout =
                hdev->skip_reset_on_timeout ||
@@ -671,9 +796,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
        kref_init(&cs->refcount);
        spin_lock_init(&cs->job_lock);
 
-       cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
+       cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC);
        if (!cs_cmpl)
-               cs_cmpl = kmalloc(sizeof(*cs_cmpl), GFP_KERNEL);
+               cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL);
 
        if (!cs_cmpl) {
                atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
@@ -698,7 +823,6 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
        cs_cmpl->hdev = hdev;
        cs_cmpl->type = cs->type;
        spin_lock_init(&cs_cmpl->lock);
-       INIT_WORK(&cs_cmpl->sob_reset_work, sob_reset_work);
        cs->fence = &cs_cmpl->base_fence;
 
        spin_lock(&ctx->cs_lock);
@@ -791,31 +915,22 @@ void hl_cs_rollback_all(struct hl_device *hdev)
                cs_rollback(hdev, cs);
                cs_put(cs);
        }
-}
-
-void hl_pending_cb_list_flush(struct hl_ctx *ctx)
-{
-       struct hl_pending_cb *pending_cb, *tmp;
 
-       list_for_each_entry_safe(pending_cb, tmp,
-                       &ctx->pending_cb_list, cb_node) {
-               list_del(&pending_cb->cb_node);
-               hl_cb_put(pending_cb->cb);
-               kfree(pending_cb);
-       }
+       force_complete_multi_cs(hdev);
 }
 
 static void
 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt)
 {
        struct hl_user_pending_interrupt *pend;
+       unsigned long flags;
 
-       spin_lock(&interrupt->wait_list_lock);
+       spin_lock_irqsave(&interrupt->wait_list_lock, flags);
        list_for_each_entry(pend, &interrupt->wait_list_head, wait_list_node) {
                pend->fence.error = -EIO;
                complete_all(&pend->fence.completion);
        }
-       spin_unlock(&interrupt->wait_list_lock);
+       spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 }
 
 void hl_release_pending_user_interrupts(struct hl_device *hdev)
@@ -981,6 +1096,10 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
                return CS_TYPE_WAIT;
        else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT)
                return CS_TYPE_COLLECTIVE_WAIT;
+       else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY)
+               return CS_RESERVE_SIGNALS;
+       else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
+               return CS_UNRESERVE_SIGNALS;
        else
                return CS_TYPE_DEFAULT;
 }
@@ -1081,7 +1200,8 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
 }
 
 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
-                               u64 sequence, u32 flags)
+                               u64 sequence, u32 flags,
+                               u32 encaps_signal_handle)
 {
        if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
                return 0;
@@ -1093,6 +1213,9 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
                /* Staged CS sequence is the first CS sequence */
                INIT_LIST_HEAD(&cs->staged_cs_node);
                cs->staged_sequence = cs->sequence;
+
+               if (cs->encaps_signals)
+                       cs->encaps_sig_hdl_id = encaps_signal_handle;
        } else {
                /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
                 * under the cs_mirror_lock
@@ -1108,9 +1231,20 @@ static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
        return 0;
 }
 
+static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid)
+{
+       int i;
+
+       for (i = 0; i < hdev->stream_master_qid_arr_size; i++)
+               if (qid == hdev->stream_master_qid_arr[i])
+                       return BIT(i);
+
+       return 0;
+}
+
 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
                                u32 num_chunks, u64 *cs_seq, u32 flags,
-                               u32 timeout)
+                               u32 encaps_signals_handle, u32 timeout)
 {
        bool staged_mid, int_queues_only = true;
        struct hl_device *hdev = hpriv->hdev;
@@ -1121,6 +1255,7 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
        struct hl_cs *cs;
        struct hl_cb *cb;
        u64 user_sequence;
+       u8 stream_master_qid_map = 0;
        int rc, i;
 
        cntr = &hdev->aggregated_cs_counters;
@@ -1148,7 +1283,8 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
 
        hl_debugfs_add_cs(cs);
 
-       rc = cs_staged_submission(hdev, cs, user_sequence, flags);
+       rc = cs_staged_submission(hdev, cs, user_sequence, flags,
+                                               encaps_signals_handle);
        if (rc)
                goto free_cs_object;
 
@@ -1179,9 +1315,20 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
                        cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle;
                }
 
-               if (queue_type == QUEUE_TYPE_EXT || queue_type == QUEUE_TYPE_HW)
+               if (queue_type == QUEUE_TYPE_EXT ||
+                                               queue_type == QUEUE_TYPE_HW) {
                        int_queues_only = false;
 
+                       /*
+                        * store which stream are being used for external/HW
+                        * queues of this CS
+                        */
+                       if (hdev->supports_wait_for_multi_cs)
+                               stream_master_qid_map |=
+                                       get_stream_master_qid_mask(hdev,
+                                                       chunk->queue_index);
+               }
+
                job = hl_cs_allocate_job(hdev, queue_type,
                                                is_kernel_allocated_cb);
                if (!job) {
@@ -1242,6 +1389,13 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
                goto free_cs_object;
        }
 
+       /*
+        * store the (external/HW queues) streams used by the CS in the
+        * fence object for multi-CS completion
+        */
+       if (hdev->supports_wait_for_multi_cs)
+               cs->fence->stream_master_qid_map = stream_master_qid_map;
+
        rc = hl_hw_queue_schedule_cs(cs);
        if (rc) {
                if (rc != -EAGAIN)
@@ -1270,130 +1424,6 @@ out:
        return rc;
 }
 
-static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
-               struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
-{
-       struct hw_queue_properties *hw_queue_prop;
-       struct hl_cs_counters_atomic *cntr;
-       struct hl_cs_job *job;
-
-       hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
-       cntr = &hdev->aggregated_cs_counters;
-
-       job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
-       if (!job) {
-               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
-               atomic64_inc(&cntr->out_of_mem_drop_cnt);
-               dev_err(hdev->dev, "Failed to allocate a new job\n");
-               return -ENOMEM;
-       }
-
-       job->id = 0;
-       job->cs = cs;
-       job->user_cb = cb;
-       atomic_inc(&job->user_cb->cs_cnt);
-       job->user_cb_size = size;
-       job->hw_queue_id = hw_queue_id;
-       job->patched_cb = job->user_cb;
-       job->job_cb_size = job->user_cb_size;
-
-       /* increment refcount as for external queues we get completion */
-       cs_get(cs);
-
-       cs->jobs_in_queue_cnt[job->hw_queue_id]++;
-
-       list_add_tail(&job->cs_node, &cs->job_list);
-
-       hl_debugfs_add_job(hdev, job);
-
-       return 0;
-}
-
-static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
-{
-       struct hl_device *hdev = hpriv->hdev;
-       struct hl_ctx *ctx = hpriv->ctx;
-       struct hl_pending_cb *pending_cb, *tmp;
-       struct list_head local_cb_list;
-       struct hl_cs *cs;
-       struct hl_cb *cb;
-       u32 hw_queue_id;
-       u32 cb_size;
-       int process_list, rc = 0;
-
-       if (list_empty(&ctx->pending_cb_list))
-               return 0;
-
-       process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
-
-       /* Only a single thread is allowed to process the list */
-       if (!process_list)
-               return 0;
-
-       if (list_empty(&ctx->pending_cb_list))
-               goto free_pending_cb_token;
-
-       /* move all list elements to a local list */
-       INIT_LIST_HEAD(&local_cb_list);
-       spin_lock(&ctx->pending_cb_lock);
-       list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
-                                                               cb_node)
-               list_move_tail(&pending_cb->cb_node, &local_cb_list);
-       spin_unlock(&ctx->pending_cb_lock);
-
-       rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs, 0,
-                               hdev->timeout_jiffies);
-       if (rc)
-               goto add_list_elements;
-
-       hl_debugfs_add_cs(cs);
-
-       /* Iterate through pending cb list, create jobs and add to CS */
-       list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
-               cb = pending_cb->cb;
-               cb_size = pending_cb->cb_size;
-               hw_queue_id = pending_cb->hw_queue_id;
-
-               rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
-                                                               hw_queue_id);
-               if (rc)
-                       goto free_cs_object;
-       }
-
-       rc = hl_hw_queue_schedule_cs(cs);
-       if (rc) {
-               if (rc != -EAGAIN)
-                       dev_err(hdev->dev,
-                               "Failed to submit CS %d.%llu (%d)\n",
-                               ctx->asid, cs->sequence, rc);
-               goto free_cs_object;
-       }
-
-       /* pending cb was scheduled successfully */
-       list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
-               list_del(&pending_cb->cb_node);
-               kfree(pending_cb);
-       }
-
-       cs_put(cs);
-
-       goto free_pending_cb_token;
-
-free_cs_object:
-       cs_rollback(hdev, cs);
-       cs_put(cs);
-add_list_elements:
-       spin_lock(&ctx->pending_cb_lock);
-       list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
-                                                               cb_node)
-               list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
-       spin_unlock(&ctx->pending_cb_lock);
-free_pending_cb_token:
-       atomic_set(&ctx->thread_pending_cb_token, 1);
-
-       return rc;
-}
-
 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
                                u64 *cs_seq)
 {
@@ -1443,7 +1473,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
                        rc = 0;
                } else {
                        rc = cs_ioctl_default(hpriv, chunks, num_chunks,
-                                       cs_seq, 0, hdev->timeout_jiffies);
+                                       cs_seq, 0, 0, hdev->timeout_jiffies);
                }
 
                mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1501,10 +1531,17 @@ out:
  * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case.
  * if the SOB value reaches the max value move to the other SOB reserved
  * to the queue.
+ * @hdev: pointer to device structure
+ * @q_idx: stream queue index
+ * @hw_sob: the H/W SOB used in this signal CS.
+ * @count: signals count
+ * @encaps_sig: tells whether it's reservation for encaps signals or not.
+ *
  * Note that this function must be called while hw_queues_lock is taken.
  */
 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
-                       struct hl_hw_sob **hw_sob, u32 count)
+                       struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig)
+
 {
        struct hl_sync_stream_properties *prop;
        struct hl_hw_sob *sob = *hw_sob, *other_sob;
@@ -1512,7 +1549,7 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
 
        prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
 
-       kref_get(&sob->kref);
+       hw_sob_get(sob);
 
        /* check for wraparound */
        if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) {
@@ -1522,7 +1559,7 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
                 * just incremented the refcount right before calling this
                 * function.
                 */
-               kref_put(&sob->kref, hl_sob_reset_error);
+               hw_sob_put_err(sob);
 
                /*
                 * check the other sob value, if it still in use then fail
@@ -1537,12 +1574,42 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
                        return -EINVAL;
                }
 
-               prop->next_sob_val = 1;
+               /*
+                * next_sob_val always points to the next available signal
+                * in the sob, so in encaps signals it will be the next one
+                * after reserving the required amount.
+                */
+               if (encaps_sig)
+                       prop->next_sob_val = count + 1;
+               else
+                       prop->next_sob_val = count;
 
                /* only two SOBs are currently in use */
                prop->curr_sob_offset = other_sob_offset;
                *hw_sob = other_sob;
 
+               /*
+                * check if other_sob needs reset, then do it before using it
+                * for the reservation or the next signal cs.
+                * we do it here, and for both encaps and regular signal cs
+                * cases in order to avoid possible races of two kref_put
+                * of the sob which can occur at the same time if we move the
+                * sob reset(kref_put) to cs_do_release function.
+                * in addition, if we have combination of cs signal and
+                * encaps, and at the point we need to reset the sob there was
+                * no more reservations and only signal cs keep coming,
+                * in such case we need signal_cs to put the refcount and
+                * reset the sob.
+                */
+               if (other_sob->need_reset)
+                       hw_sob_put(other_sob);
+
+               if (encaps_sig) {
+                       /* set reset indication for the sob */
+                       sob->need_reset = true;
+                       hw_sob_get(other_sob);
+               }
+
                dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n",
                                prop->curr_sob_offset, q_idx);
        } else {
@@ -1553,12 +1620,18 @@ int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
 }
 
 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
-               struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx)
+               struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx,
+               bool encaps_signals)
 {
        u64 *signal_seq_arr = NULL;
        u32 size_to_copy, signal_seq_arr_len;
        int rc = 0;
 
+       if (encaps_signals) {
+               *signal_seq = chunk->encaps_signal_seq;
+               return 0;
+       }
+
        signal_seq_arr_len = chunk->num_signal_seq_arr;
 
        /* currently only one signal seq is supported */
@@ -1583,7 +1656,7 @@ static int cs_ioctl_extract_signal_seq(struct hl_device *hdev,
                return -ENOMEM;
        }
 
-       size_to_copy = chunk->num_signal_seq_arr * sizeof(*signal_seq_arr);
+       size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr);
        if (copy_from_user(signal_seq_arr,
                                u64_to_user_ptr(chunk->signal_seq_arr),
                                size_to_copy)) {
@@ -1605,8 +1678,8 @@ out:
 }
 
 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
-               struct hl_ctx *ctx, struct hl_cs *cs, enum hl_queue_type q_type,
-               u32 q_idx)
+               struct hl_ctx *ctx, struct hl_cs *cs,
+               enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset)
 {
        struct hl_cs_counters_atomic *cntr;
        struct hl_cs_job *job;
@@ -1644,6 +1717,9 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
        job->user_cb_size = cb_size;
        job->hw_queue_id = q_idx;
 
+       if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT)
+                       && cs->encaps_signals)
+               job->encaps_sig_wait_offset = encaps_signal_offset;
        /*
         * No need in parsing, user CB is the patched CB.
         * We call hl_cb_destroy() out of two reasons - we don't need the CB in
@@ -1666,75 +1742,307 @@ static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev,
        return 0;
 }
 
-static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
-                               void __user *chunks, u32 num_chunks,
-                               u64 *cs_seq, u32 flags, u32 timeout)
+static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv,
+                               u32 q_idx, u32 count,
+                               u32 *handle_id, u32 *sob_addr,
+                               u32 *signals_count)
 {
-       struct hl_cs_chunk *cs_chunk_array, *chunk;
        struct hw_queue_properties *hw_queue_prop;
+       struct hl_sync_stream_properties *prop;
        struct hl_device *hdev = hpriv->hdev;
-       struct hl_cs_compl *sig_waitcs_cmpl;
-       u32 q_idx, collective_engine_id = 0;
-       struct hl_cs_counters_atomic *cntr;
-       struct hl_fence *sig_fence = NULL;
-       struct hl_ctx *ctx = hpriv->ctx;
-       enum hl_queue_type q_type;
-       struct hl_cs *cs;
-       u64 signal_seq;
-       int rc;
-
-       cntr = &hdev->aggregated_cs_counters;
-       *cs_seq = ULLONG_MAX;
+       struct hl_cs_encaps_sig_handle *handle;
+       struct hl_encaps_signals_mgr *mgr;
+       struct hl_hw_sob *hw_sob;
+       int hdl_id;
+       int rc = 0;
 
-       rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
-                       ctx);
-       if (rc)
+       if (count >= HL_MAX_SOB_VAL) {
+               dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n",
+                                               count);
+               rc = -EINVAL;
                goto out;
+       }
 
-       /* currently it is guaranteed to have only one chunk */
-       chunk = &cs_chunk_array[0];
-
-       if (chunk->queue_index >= hdev->asic_prop.max_queues) {
-               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
-               atomic64_inc(&cntr->validation_drop_cnt);
+       if (q_idx >= hdev->asic_prop.max_queues) {
                dev_err(hdev->dev, "Queue index %d is invalid\n",
-                       chunk->queue_index);
+                       q_idx);
                rc = -EINVAL;
-               goto free_cs_chunk_array;
+               goto out;
        }
 
-       q_idx = chunk->queue_index;
        hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
-       q_type = hw_queue_prop->type;
 
        if (!hw_queue_prop->supports_sync_stream) {
-               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
-               atomic64_inc(&cntr->validation_drop_cnt);
                dev_err(hdev->dev,
                        "Queue index %d does not support sync stream operations\n",
-                       q_idx);
+                                                                       q_idx);
                rc = -EINVAL;
-               goto free_cs_chunk_array;
+               goto out;
        }
 
-       if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
-               if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
-                       atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
-                       atomic64_inc(&cntr->validation_drop_cnt);
-                       dev_err(hdev->dev,
-                               "Queue index %d is invalid\n", q_idx);
-                       rc = -EINVAL;
-                       goto free_cs_chunk_array;
-               }
+       prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
 
-               collective_engine_id = chunk->collective_engine_id;
-       }
+       handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+       if (!handle) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       handle->count = count;
+       mgr = &hpriv->ctx->sig_mgr;
+
+       spin_lock(&mgr->lock);
+       hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC);
+       spin_unlock(&mgr->lock);
+
+       if (hdl_id < 0) {
+               dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n");
+               rc = -EINVAL;
+               goto out;
+       }
+
+       handle->id = hdl_id;
+       handle->q_idx = q_idx;
+       handle->hdev = hdev;
+       kref_init(&handle->refcount);
+
+       hdev->asic_funcs->hw_queues_lock(hdev);
+
+       hw_sob = &prop->hw_sob[prop->curr_sob_offset];
+
+       /*
+        * Increment the SOB value by count by user request
+        * to reserve those signals
+        * check if the signals amount to reserve is not exceeding the max sob
+        * value, if yes then switch sob.
+        */
+       rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count,
+                                                               true);
+       if (rc) {
+               dev_err(hdev->dev, "Failed to switch SOB\n");
+               hdev->asic_funcs->hw_queues_unlock(hdev);
+               rc = -EINVAL;
+               goto remove_idr;
+       }
+       /* set the hw_sob to the handle after calling the sob wraparound handler
+        * since sob could have changed.
+        */
+       handle->hw_sob = hw_sob;
+
+       /* store the current sob value for unreserve validity check, and
+        * signal offset support
+        */
+       handle->pre_sob_val = prop->next_sob_val - handle->count;
+
+       *signals_count = prop->next_sob_val;
+       hdev->asic_funcs->hw_queues_unlock(hdev);
+
+       *sob_addr = handle->hw_sob->sob_addr;
+       *handle_id = hdl_id;
+
+       dev_dbg(hdev->dev,
+               "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n",
+                       hw_sob->sob_id, handle->hw_sob->sob_addr,
+                       prop->next_sob_val - 1, q_idx, hdl_id);
+       goto out;
+
+remove_idr:
+       spin_lock(&mgr->lock);
+       idr_remove(&mgr->handles, hdl_id);
+       spin_unlock(&mgr->lock);
+
+       kfree(handle);
+out:
+       return rc;
+}
+
+static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id)
+{
+       struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
+       struct hl_sync_stream_properties *prop;
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_encaps_signals_mgr *mgr;
+       struct hl_hw_sob *hw_sob;
+       u32 q_idx, sob_addr;
+       int rc = 0;
+
+       mgr = &hpriv->ctx->sig_mgr;
+
+       spin_lock(&mgr->lock);
+       encaps_sig_hdl = idr_find(&mgr->handles, handle_id);
+       if (encaps_sig_hdl) {
+               dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n",
+                               handle_id, encaps_sig_hdl->hw_sob->sob_addr,
+                                       encaps_sig_hdl->count);
+
+               hdev->asic_funcs->hw_queues_lock(hdev);
+
+               q_idx = encaps_sig_hdl->q_idx;
+               prop = &hdev->kernel_queues[q_idx].sync_stream_prop;
+               hw_sob = &prop->hw_sob[prop->curr_sob_offset];
+               sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
+
+               /* Check if sob_val got out of sync due to other
+                * signal submission requests which were handled
+                * between the reserve-unreserve calls or SOB switch
+                * upon reaching SOB max value.
+                */
+               if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count
+                               != prop->next_sob_val ||
+                               sob_addr != encaps_sig_hdl->hw_sob->sob_addr) {
+                       dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n",
+                               encaps_sig_hdl->pre_sob_val,
+                               (prop->next_sob_val - encaps_sig_hdl->count));
+
+                       hdev->asic_funcs->hw_queues_unlock(hdev);
+                       rc = -EINVAL;
+                       goto out;
+               }
+
+               /*
+                * Decrement the SOB value by count by user request
+                * to unreserve those signals
+                */
+               prop->next_sob_val -= encaps_sig_hdl->count;
 
-       if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT) {
-               rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, ctx);
+               hdev->asic_funcs->hw_queues_unlock(hdev);
+
+               hw_sob_put(hw_sob);
+
+               /* Release the id and free allocated memory of the handle */
+               idr_remove(&mgr->handles, handle_id);
+               kfree(encaps_sig_hdl);
+       } else {
+               rc = -EINVAL;
+               dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n");
+       }
+out:
+       spin_unlock(&mgr->lock);
+
+       return rc;
+}
+
+static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
+                               void __user *chunks, u32 num_chunks,
+                               u64 *cs_seq, u32 flags, u32 timeout)
+{
+       struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL;
+       bool handle_found = false, is_wait_cs = false,
+                       wait_cs_submitted = false,
+                       cs_encaps_signals = false;
+       struct hl_cs_chunk *cs_chunk_array, *chunk;
+       bool staged_cs_with_encaps_signals = false;
+       struct hw_queue_properties *hw_queue_prop;
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_cs_compl *sig_waitcs_cmpl;
+       u32 q_idx, collective_engine_id = 0;
+       struct hl_cs_counters_atomic *cntr;
+       struct hl_fence *sig_fence = NULL;
+       struct hl_ctx *ctx = hpriv->ctx;
+       enum hl_queue_type q_type;
+       struct hl_cs *cs;
+       u64 signal_seq;
+       int rc;
+
+       cntr = &hdev->aggregated_cs_counters;
+       *cs_seq = ULLONG_MAX;
+
+       rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
+                       ctx);
+       if (rc)
+               goto out;
+
+       /* currently it is guaranteed to have only one chunk */
+       chunk = &cs_chunk_array[0];
+
+       if (chunk->queue_index >= hdev->asic_prop.max_queues) {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&cntr->validation_drop_cnt);
+               dev_err(hdev->dev, "Queue index %d is invalid\n",
+                       chunk->queue_index);
+               rc = -EINVAL;
+               goto free_cs_chunk_array;
+       }
+
+       q_idx = chunk->queue_index;
+       hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx];
+       q_type = hw_queue_prop->type;
+
+       if (!hw_queue_prop->supports_sync_stream) {
+               atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+               atomic64_inc(&cntr->validation_drop_cnt);
+               dev_err(hdev->dev,
+                       "Queue index %d does not support sync stream operations\n",
+                       q_idx);
+               rc = -EINVAL;
+               goto free_cs_chunk_array;
+       }
+
+       if (cs_type == CS_TYPE_COLLECTIVE_WAIT) {
+               if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) {
+                       atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
+                       atomic64_inc(&cntr->validation_drop_cnt);
+                       dev_err(hdev->dev,
+                               "Queue index %d is invalid\n", q_idx);
+                       rc = -EINVAL;
+                       goto free_cs_chunk_array;
+               }
+
+               collective_engine_id = chunk->collective_engine_id;
+       }
+
+       is_wait_cs = !!(cs_type == CS_TYPE_WAIT ||
+                       cs_type == CS_TYPE_COLLECTIVE_WAIT);
+
+       cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS);
+
+       if (is_wait_cs) {
+               rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq,
+                               ctx, cs_encaps_signals);
                if (rc)
                        goto free_cs_chunk_array;
 
+               if (cs_encaps_signals) {
+                       /* check if cs sequence has encapsulated
+                        * signals handle
+                        */
+                       struct idr *idp;
+                       u32 id;
+
+                       spin_lock(&ctx->sig_mgr.lock);
+                       idp = &ctx->sig_mgr.handles;
+                       idr_for_each_entry(idp, encaps_sig_hdl, id) {
+                               if (encaps_sig_hdl->cs_seq == signal_seq) {
+                                       handle_found = true;
+                                       /* get refcount to protect removing
+                                        * this handle from idr, needed when
+                                        * multiple wait cs are used with offset
+                                        * to wait on reserved encaps signals.
+                                        */
+                                       kref_get(&encaps_sig_hdl->refcount);
+                                       break;
+                               }
+                       }
+                       spin_unlock(&ctx->sig_mgr.lock);
+
+                       if (!handle_found) {
+                               dev_err(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n",
+                                               signal_seq);
+                               rc = -EINVAL;
+                               goto free_cs_chunk_array;
+                       }
+
+                       /* validate also the signal offset value */
+                       if (chunk->encaps_signal_offset >
+                                       encaps_sig_hdl->count) {
+                               dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n",
+                                               chunk->encaps_signal_offset,
+                                               encaps_sig_hdl->count);
+                               rc = -EINVAL;
+                               goto free_cs_chunk_array;
+                       }
+               }
+
                sig_fence = hl_ctx_get_fence(ctx, signal_seq);
                if (IS_ERR(sig_fence)) {
                        atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
@@ -1755,11 +2063,16 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
                sig_waitcs_cmpl =
                        container_of(sig_fence, struct hl_cs_compl, base_fence);
 
-               if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL) {
+               staged_cs_with_encaps_signals = !!
+                               (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT &&
+                               (flags & HL_CS_FLAGS_ENCAP_SIGNALS));
+
+               if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL &&
+                               !staged_cs_with_encaps_signals) {
                        atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
                        atomic64_inc(&cntr->validation_drop_cnt);
                        dev_err(hdev->dev,
-                               "CS seq 0x%llx is not of a signal CS\n",
+                               "CS seq 0x%llx is not of a signal/encaps-signal CS\n",
                                signal_seq);
                        hl_fence_put(sig_fence);
                        rc = -EINVAL;
@@ -1776,18 +2089,27 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
        rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout);
        if (rc) {
-               if (cs_type == CS_TYPE_WAIT ||
-                       cs_type == CS_TYPE_COLLECTIVE_WAIT)
+               if (is_wait_cs)
                        hl_fence_put(sig_fence);
+
                goto free_cs_chunk_array;
        }
 
        /*
         * Save the signal CS fence for later initialization right before
         * hanging the wait CS on the queue.
+        * for encaps signals case, we save the cs sequence and handle pointer
+        * for later initialization.
         */
-       if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_COLLECTIVE_WAIT)
+       if (is_wait_cs) {
                cs->signal_fence = sig_fence;
+               /* store the handle pointer, so we don't have to
+                * look for it again, later on the flow
+                * when we need to set SOB info in hw_queue.
+                */
+               if (cs->encaps_signals)
+                       cs->encaps_sig_hdl = encaps_sig_hdl;
+       }
 
        hl_debugfs_add_cs(cs);
 
@@ -1795,10 +2117,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
        if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL)
                rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type,
-                               q_idx);
+                               q_idx, chunk->encaps_signal_offset);
        else if (cs_type == CS_TYPE_COLLECTIVE_WAIT)
                rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx,
-                               cs, q_idx, collective_engine_id);
+                               cs, q_idx, collective_engine_id,
+                               chunk->encaps_signal_offset);
        else {
                atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
                atomic64_inc(&cntr->validation_drop_cnt);
@@ -1810,7 +2133,13 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
 
        rc = hl_hw_queue_schedule_cs(cs);
        if (rc) {
-               if (rc != -EAGAIN)
+               /* In case wait cs failed here, it means the signal cs
+                * already completed. we want to free all it's related objects
+                * but we don't want to fail the ioctl.
+                */
+               if (is_wait_cs)
+                       rc = 0;
+               else if (rc != -EAGAIN)
                        dev_err(hdev->dev,
                                "Failed to submit CS %d.%llu to H/W queues, error %d\n",
                                ctx->asid, cs->sequence, rc);
@@ -1818,6 +2147,8 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
        }
 
        rc = HL_CS_STATUS_SUCCESS;
+       if (is_wait_cs)
+               wait_cs_submitted = true;
        goto put_cs;
 
 free_cs_object:
@@ -1828,6 +2159,10 @@ put_cs:
        /* We finished with the CS in this function, so put the ref */
        cs_put(cs);
 free_cs_chunk_array:
+       if (!wait_cs_submitted && cs_encaps_signals && handle_found &&
+                                                       is_wait_cs)
+               kref_put(&encaps_sig_hdl->refcount,
+                               hl_encaps_handle_do_release);
        kfree(cs_chunk_array);
 out:
        return rc;
@@ -1836,10 +2171,11 @@ out:
 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
 {
        union hl_cs_args *args = data;
-       enum hl_cs_type cs_type;
+       enum hl_cs_type cs_type = 0;
        u64 cs_seq = ULONG_MAX;
        void __user *chunks;
-       u32 num_chunks, flags, timeout;
+       u32 num_chunks, flags, timeout,
+               signals_count = 0, sob_addr = 0, handle_id = 0;
        int rc;
 
        rc = hl_cs_sanity_checks(hpriv, args);
@@ -1850,10 +2186,6 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
        if (rc)
                goto out;
 
-       rc = hl_submit_pending_cb(hpriv);
-       if (rc)
-               goto out;
-
        cs_type = hl_cs_get_cs_type(args->in.cs_flags &
                                        ~HL_CS_FLAGS_FORCE_RESTORE);
        chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
@@ -1876,80 +2208,448 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
                rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks,
                                        &cs_seq, args->in.cs_flags, timeout);
                break;
+       case CS_RESERVE_SIGNALS:
+               rc = cs_ioctl_reserve_signals(hpriv,
+                                       args->in.encaps_signals_q_idx,
+                                       args->in.encaps_signals_count,
+                                       &handle_id, &sob_addr, &signals_count);
+               break;
+       case CS_UNRESERVE_SIGNALS:
+               rc = cs_ioctl_unreserve_signals(hpriv,
+                                       args->in.encaps_sig_handle_id);
+               break;
        default:
                rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
-                                               args->in.cs_flags, timeout);
+                                               args->in.cs_flags,
+                                               args->in.encaps_sig_handle_id,
+                                               timeout);
                break;
        }
-
 out:
        if (rc != -EAGAIN) {
                memset(args, 0, sizeof(*args));
+
+               if (cs_type == CS_RESERVE_SIGNALS) {
+                       args->out.handle_id = handle_id;
+                       args->out.sob_base_addr_offset = sob_addr;
+                       args->out.count = signals_count;
+               } else {
+                       args->out.seq = cs_seq;
+               }
                args->out.status = rc;
-               args->out.seq = cs_seq;
        }
 
        return rc;
 }
 
+static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence,
+                               enum hl_cs_wait_status *status, u64 timeout_us,
+                               s64 *timestamp)
+{
+       struct hl_device *hdev = ctx->hdev;
+       long completion_rc;
+       int rc = 0;
+
+       if (IS_ERR(fence)) {
+               rc = PTR_ERR(fence);
+               if (rc == -EINVAL)
+                       dev_notice_ratelimited(hdev->dev,
+                               "Can't wait on CS %llu because current CS is at seq %llu\n",
+                               seq, ctx->cs_sequence);
+               return rc;
+       }
+
+       if (!fence) {
+               dev_dbg(hdev->dev,
+                       "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
+                               seq, ctx->cs_sequence);
+
+               *status = CS_WAIT_STATUS_GONE;
+               return 0;
+       }
+
+       if (!timeout_us) {
+               completion_rc = completion_done(&fence->completion);
+       } else {
+               unsigned long timeout;
+
+               timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ?
+                               timeout_us : usecs_to_jiffies(timeout_us);
+               completion_rc =
+                       wait_for_completion_interruptible_timeout(
+                               &fence->completion, timeout);
+       }
+
+       if (completion_rc > 0) {
+               *status = CS_WAIT_STATUS_COMPLETED;
+               if (timestamp)
+                       *timestamp = ktime_to_ns(fence->timestamp);
+       } else {
+               *status = CS_WAIT_STATUS_BUSY;
+       }
+
+       if (fence->error == -ETIMEDOUT)
+               rc = -ETIMEDOUT;
+       else if (fence->error == -EIO)
+               rc = -EIO;
+
+       return rc;
+}
+
+/*
+ * hl_cs_poll_fences - iterate CS fences to check for CS completion
+ *
+ * @mcs_data: multi-CS internal data
+ *
+ * @return 0 on success, otherwise non 0 error code
+ *
+ * The function iterates on all CS sequence in the list and set bit in
+ * completion_bitmap for each completed CS.
+ * while iterating, the function can extracts the stream map to be later
+ * used by the waiting function.
+ * this function shall be called after taking context ref
+ */
+static int hl_cs_poll_fences(struct multi_cs_data *mcs_data)
+{
+       struct hl_fence **fence_ptr = mcs_data->fence_arr;
+       struct hl_device *hdev = mcs_data->ctx->hdev;
+       int i, rc, arr_len = mcs_data->arr_len;
+       u64 *seq_arr = mcs_data->seq_arr;
+       ktime_t max_ktime, first_cs_time;
+       enum hl_cs_wait_status status;
+
+       memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
+
+       /* get all fences under the same lock */
+       rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
+       if (rc)
+               return rc;
+
+       /*
+        * set to maximum time to verify timestamp is valid: if at the end
+        * this value is maintained- no timestamp was updated
+        */
+       max_ktime = ktime_set(KTIME_SEC_MAX, 0);
+       first_cs_time = max_ktime;
+
+       for (i = 0; i < arr_len; i++, fence_ptr++) {
+               struct hl_fence *fence = *fence_ptr;
+
+               /*
+                * function won't sleep as it is called with timeout 0 (i.e.
+                * poll the fence)
+                */
+               rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence,
+                                               &status, 0, NULL);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "wait_for_fence error :%d for CS seq %llu\n",
+                                                               rc, seq_arr[i]);
+                       break;
+               }
+
+               mcs_data->stream_master_qid_map |= fence->stream_master_qid_map;
+
+               if (status == CS_WAIT_STATUS_BUSY)
+                       continue;
+
+               mcs_data->completion_bitmap |= BIT(i);
+
+               /*
+                * best effort to extract timestamp. few notes:
+                * - if even single fence is gone we cannot extract timestamp
+                *   (as fence not exist anymore)
+                * - for all completed CSs we take the earliest timestamp.
+                *   for this we have to validate that:
+                *       1. given timestamp was indeed set
+                *       2. the timestamp is earliest of all timestamps so far
+                */
+
+               if (status == CS_WAIT_STATUS_GONE) {
+                       mcs_data->update_ts = false;
+                       mcs_data->gone_cs = true;
+               } else if (mcs_data->update_ts &&
+                       (ktime_compare(fence->timestamp,
+                                               ktime_set(0, 0)) > 0) &&
+                       (ktime_compare(fence->timestamp, first_cs_time) < 0)) {
+                       first_cs_time = fence->timestamp;
+               }
+       }
+
+       hl_fences_put(mcs_data->fence_arr, arr_len);
+
+       if (mcs_data->update_ts &&
+                       (ktime_compare(first_cs_time, max_ktime) != 0))
+               mcs_data->timestamp = ktime_to_ns(first_cs_time);
+
+       return rc;
+}
+
 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
                                u64 timeout_us, u64 seq,
                                enum hl_cs_wait_status *status, s64 *timestamp)
 {
        struct hl_fence *fence;
-       unsigned long timeout;
        int rc = 0;
-       long completion_rc;
 
        if (timestamp)
                *timestamp = 0;
 
-       if (timeout_us == MAX_SCHEDULE_TIMEOUT)
-               timeout = timeout_us;
-       else
-               timeout = usecs_to_jiffies(timeout_us);
-
        hl_ctx_get(hdev, ctx);
 
        fence = hl_ctx_get_fence(ctx, seq);
-       if (IS_ERR(fence)) {
-               rc = PTR_ERR(fence);
-               if (rc == -EINVAL)
-                       dev_notice_ratelimited(hdev->dev,
-                               "Can't wait on CS %llu because current CS is at seq %llu\n",
-                               seq, ctx->cs_sequence);
-       } else if (fence) {
-               if (!timeout_us)
-                       completion_rc = completion_done(&fence->completion);
-               else
-                       completion_rc =
-                               wait_for_completion_interruptible_timeout(
-                                       &fence->completion, timeout);
 
-               if (completion_rc > 0) {
-                       *status = CS_WAIT_STATUS_COMPLETED;
-                       if (timestamp)
-                               *timestamp = ktime_to_ns(fence->timestamp);
-               } else {
-                       *status = CS_WAIT_STATUS_BUSY;
+       rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp);
+       hl_fence_put(fence);
+       hl_ctx_put(ctx);
+
+       return rc;
+}
+
+/*
+ * hl_wait_multi_cs_completion_init - init completion structure
+ *
+ * @hdev: pointer to habanalabs device structure
+ * @stream_master_bitmap: stream master QIDs map, set bit indicates stream
+ *                        master QID to wait on
+ *
+ * @return valid completion struct pointer on success, otherwise error pointer
+ *
+ * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver.
+ * the function gets the first available completion (by marking it "used")
+ * and initialize its values.
+ */
+static struct multi_cs_completion *hl_wait_multi_cs_completion_init(
+                                                       struct hl_device *hdev,
+                                                       u8 stream_master_bitmap)
+{
+       struct multi_cs_completion *mcs_compl;
+       int i;
+
+       /* find free multi_cs completion structure */
+       for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
+               mcs_compl = &hdev->multi_cs_completion[i];
+               spin_lock(&mcs_compl->lock);
+               if (!mcs_compl->used) {
+                       mcs_compl->used = 1;
+                       mcs_compl->timestamp = 0;
+                       mcs_compl->stream_master_qid_map = stream_master_bitmap;
+                       reinit_completion(&mcs_compl->completion);
+                       spin_unlock(&mcs_compl->lock);
+                       break;
                }
+               spin_unlock(&mcs_compl->lock);
+       }
 
-               if (fence->error == -ETIMEDOUT)
-                       rc = -ETIMEDOUT;
-               else if (fence->error == -EIO)
-                       rc = -EIO;
+       if (i == MULTI_CS_MAX_USER_CTX) {
+               dev_err(hdev->dev,
+                               "no available multi-CS completion structure\n");
+               return ERR_PTR(-ENOMEM);
+       }
+       return mcs_compl;
+}
 
-               hl_fence_put(fence);
-       } else {
-               dev_dbg(hdev->dev,
-                       "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n",
-                       seq, ctx->cs_sequence);
-               *status = CS_WAIT_STATUS_GONE;
+/*
+ * hl_wait_multi_cs_completion_fini - return completion structure and set as
+ *                                    unused
+ *
+ * @mcs_compl: pointer to the completion structure
+ */
+static void hl_wait_multi_cs_completion_fini(
+                                       struct multi_cs_completion *mcs_compl)
+{
+       /*
+        * free completion structure, do it under lock to be in-sync with the
+        * thread that signals completion
+        */
+       spin_lock(&mcs_compl->lock);
+       mcs_compl->used = 0;
+       spin_unlock(&mcs_compl->lock);
+}
+
+/*
+ * hl_wait_multi_cs_completion - wait for first CS to complete
+ *
+ * @mcs_data: multi-CS internal data
+ *
+ * @return 0 on success, otherwise non 0 error code
+ */
+static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data)
+{
+       struct hl_device *hdev = mcs_data->ctx->hdev;
+       struct multi_cs_completion *mcs_compl;
+       long completion_rc;
+
+       mcs_compl = hl_wait_multi_cs_completion_init(hdev,
+                                       mcs_data->stream_master_qid_map);
+       if (IS_ERR(mcs_compl))
+               return PTR_ERR(mcs_compl);
+
+       completion_rc = wait_for_completion_interruptible_timeout(
+                                       &mcs_compl->completion,
+                                       usecs_to_jiffies(mcs_data->timeout_us));
+
+       /* update timestamp */
+       if (completion_rc > 0)
+               mcs_data->timestamp = mcs_compl->timestamp;
+
+       hl_wait_multi_cs_completion_fini(mcs_compl);
+
+       mcs_data->wait_status = completion_rc;
+
+       return 0;
+}
+
+/*
+ * hl_multi_cs_completion_init - init array of multi-CS completion structures
+ *
+ * @hdev: pointer to habanalabs device structure
+ */
+void hl_multi_cs_completion_init(struct hl_device *hdev)
+{
+       struct multi_cs_completion *mcs_cmpl;
+       int i;
+
+       for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) {
+               mcs_cmpl = &hdev->multi_cs_completion[i];
+               mcs_cmpl->used = 0;
+               spin_lock_init(&mcs_cmpl->lock);
+               init_completion(&mcs_cmpl->completion);
+       }
+}
+
+/*
+ * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl
+ *
+ * @hpriv: pointer to the private data of the fd
+ * @data: pointer to multi-CS wait ioctl in/out args
+ *
+ */
+static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct multi_cs_data mcs_data = {0};
+       union hl_wait_cs_args *args = data;
+       struct hl_ctx *ctx = hpriv->ctx;
+       struct hl_fence **fence_arr;
+       void __user *seq_arr;
+       u32 size_to_copy;
+       u64 *cs_seq_arr;
+       u8 seq_arr_len;
+       int rc;
+
+       if (!hdev->supports_wait_for_multi_cs) {
+               dev_err(hdev->dev, "Wait for multi CS is not supported\n");
+               return -EPERM;
+       }
+
+       seq_arr_len = args->in.seq_arr_len;
+
+       if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) {
+               dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n",
+                               HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len);
+               return -EINVAL;
+       }
+
+       /* allocate memory for sequence array */
+       cs_seq_arr =
+               kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL);
+       if (!cs_seq_arr)
+               return -ENOMEM;
+
+       /* copy CS sequence array from user */
+       seq_arr = (void __user *) (uintptr_t) args->in.seq;
+       size_to_copy = seq_arr_len * sizeof(*cs_seq_arr);
+       if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) {
+               dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n");
+               rc = -EFAULT;
+               goto free_seq_arr;
+       }
+
+       /* allocate array for the fences */
+       fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
+       if (!fence_arr) {
+               rc = -ENOMEM;
+               goto free_seq_arr;
+       }
+
+       /* initialize the multi-CS internal data */
+       mcs_data.ctx = ctx;
+       mcs_data.seq_arr = cs_seq_arr;
+       mcs_data.fence_arr = fence_arr;
+       mcs_data.arr_len = seq_arr_len;
+
+       hl_ctx_get(hdev, ctx);
+
+       /* poll all CS fences, extract timestamp */
+       mcs_data.update_ts = true;
+       rc = hl_cs_poll_fences(&mcs_data);
+       /*
+        * skip wait for CS completion when one of the below is true:
+        * - an error on the poll function
+        * - one or more CS in the list completed
+        * - the user called ioctl with timeout 0
+        */
+       if (rc || mcs_data.completion_bitmap || !args->in.timeout_us)
+               goto put_ctx;
+
+       /* wait (with timeout) for the first CS to be completed */
+       mcs_data.timeout_us = args->in.timeout_us;
+       rc = hl_wait_multi_cs_completion(&mcs_data);
+       if (rc)
+               goto put_ctx;
+
+       if (mcs_data.wait_status > 0) {
+               /*
+                * poll fences once again to update the CS map.
+                * no timestamp should be updated this time.
+                */
+               mcs_data.update_ts = false;
+               rc = hl_cs_poll_fences(&mcs_data);
+
+               /*
+                * if hl_wait_multi_cs_completion returned before timeout (i.e.
+                * it got a completion) we expect to see at least one CS
+                * completed after the poll function.
+                */
+               if (!mcs_data.completion_bitmap) {
+                       dev_err(hdev->dev, "Multi-CS got completion on wait but no CS completed\n");
+                       rc = -EFAULT;
+               }
        }
 
+put_ctx:
        hl_ctx_put(ctx);
+       kfree(fence_arr);
 
-       return rc;
+free_seq_arr:
+       kfree(cs_seq_arr);
+
+       /* update output args */
+       memset(args, 0, sizeof(*args));
+       if (rc)
+               return rc;
+
+       if (mcs_data.completion_bitmap) {
+               args->out.status = HL_WAIT_CS_STATUS_COMPLETED;
+               args->out.cs_completion_map = mcs_data.completion_bitmap;
+
+               /* if timestamp not 0- it's valid */
+               if (mcs_data.timestamp) {
+                       args->out.timestamp_nsec = mcs_data.timestamp;
+                       args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD;
+               }
+
+               /* update if some CS was gone */
+               if (mcs_data.timestamp)
+                       args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE;
+       } else if (mcs_data.wait_status == -ERESTARTSYS) {
+               args->out.status = HL_WAIT_CS_STATUS_INTERRUPTED;
+       } else {
+               args->out.status = HL_WAIT_CS_STATUS_BUSY;
+       }
+
+       return 0;
 }
 
 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
@@ -2015,9 +2715,9 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
 {
        struct hl_user_pending_interrupt *pend;
        struct hl_user_interrupt *interrupt;
-       unsigned long timeout;
-       long completion_rc;
+       unsigned long timeout, flags;
        u32 completion_value;
+       long completion_rc;
        int rc = 0;
 
        if (timeout_us == U32_MAX)
@@ -2040,17 +2740,10 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
        else
                interrupt = &hdev->user_interrupt[interrupt_offset];
 
-       spin_lock(&interrupt->wait_list_lock);
-       if (!hl_device_operational(hdev, NULL)) {
-               rc = -EPERM;
-               goto unlock_and_free_fence;
-       }
-
        if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
-               dev_err(hdev->dev,
-                       "Failed to copy completion value from user\n");
+               dev_err(hdev->dev, "Failed to copy completion value from user\n");
                rc = -EFAULT;
-               goto unlock_and_free_fence;
+               goto free_fence;
        }
 
        if (completion_value >= target_value)
@@ -2059,48 +2752,57 @@ static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx,
                *status = CS_WAIT_STATUS_BUSY;
 
        if (!timeout_us || (*status == CS_WAIT_STATUS_COMPLETED))
-               goto unlock_and_free_fence;
+               goto free_fence;
 
        /* Add pending user interrupt to relevant list for the interrupt
         * handler to monitor
         */
+       spin_lock_irqsave(&interrupt->wait_list_lock, flags);
        list_add_tail(&pend->wait_list_node, &interrupt->wait_list_head);
-       spin_unlock(&interrupt->wait_list_lock);
+       spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
 wait_again:
        /* Wait for interrupt handler to signal completion */
-       completion_rc =
-               wait_for_completion_interruptible_timeout(
-                               &pend->fence.completion, timeout);
+       completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion,
+                                                                               timeout);
 
        /* If timeout did not expire we need to perform the comparison.
         * If comparison fails, keep waiting until timeout expires
         */
        if (completion_rc > 0) {
-               if (copy_from_user(&completion_value,
-                               u64_to_user_ptr(user_address), 4)) {
-                       dev_err(hdev->dev,
-                               "Failed to copy completion value from user\n");
+               if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 4)) {
+                       dev_err(hdev->dev, "Failed to copy completion value from user\n");
                        rc = -EFAULT;
+
                        goto remove_pending_user_interrupt;
                }
 
                if (completion_value >= target_value) {
                        *status = CS_WAIT_STATUS_COMPLETED;
                } else {
+                       spin_lock_irqsave(&interrupt->wait_list_lock, flags);
+                       reinit_completion(&pend->fence.completion);
                        timeout = completion_rc;
+
+                       spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
                        goto wait_again;
                }
+       } else if (completion_rc == -ERESTARTSYS) {
+               dev_err_ratelimited(hdev->dev,
+                       "user process got signal while waiting for interrupt ID %d\n",
+                       interrupt->interrupt_id);
+               *status = HL_WAIT_CS_STATUS_INTERRUPTED;
+               rc = -EINTR;
        } else {
                *status = CS_WAIT_STATUS_BUSY;
        }
 
 remove_pending_user_interrupt:
-       spin_lock(&interrupt->wait_list_lock);
+       spin_lock_irqsave(&interrupt->wait_list_lock, flags);
        list_del(&pend->wait_list_node);
+       spin_unlock_irqrestore(&interrupt->wait_list_lock, flags);
 
-unlock_and_free_fence:
-       spin_unlock(&interrupt->wait_list_lock);
+free_fence:
        kfree(pend);
        hl_ctx_put(ctx);
 
@@ -2148,8 +2850,9 @@ static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data)
        memset(args, 0, sizeof(*args));
 
        if (rc) {
-               dev_err_ratelimited(hdev->dev,
-                       "interrupt_wait_ioctl failed (%d)\n", rc);
+               if (rc != -EINTR)
+                       dev_err_ratelimited(hdev->dev,
+                               "interrupt_wait_ioctl failed (%d)\n", rc);
 
                return rc;
        }
@@ -2173,8 +2876,16 @@ int hl_wait_ioctl(struct hl_fpriv *hpriv, void *data)
        u32 flags = args->in.flags;
        int rc;
 
+       /* If the device is not operational, no point in waiting for any command submission or
+        * user interrupt
+        */
+       if (!hl_device_operational(hpriv->hdev, NULL))
+               return -EPERM;
+
        if (flags & HL_WAIT_CS_FLAGS_INTERRUPT)
                rc = hl_interrupt_wait_ioctl(hpriv, data);
+       else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS)
+               rc = hl_multi_cs_wait_ioctl(hpriv, data);
        else
                rc = hl_cs_wait_ioctl(hpriv, data);
 
index 19b6b04..2297830 100644 (file)
@@ -9,16 +9,70 @@
 
 #include <linux/slab.h>
 
+void hl_encaps_handle_do_release(struct kref *ref)
+{
+       struct hl_cs_encaps_sig_handle *handle =
+               container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
+       struct hl_ctx *ctx = handle->hdev->compute_ctx;
+       struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
+
+       spin_lock(&mgr->lock);
+       idr_remove(&mgr->handles, handle->id);
+       spin_unlock(&mgr->lock);
+
+       kfree(handle);
+}
+
+static void hl_encaps_handle_do_release_sob(struct kref *ref)
+{
+       struct hl_cs_encaps_sig_handle *handle =
+               container_of(ref, struct hl_cs_encaps_sig_handle, refcount);
+       struct hl_ctx *ctx = handle->hdev->compute_ctx;
+       struct hl_encaps_signals_mgr *mgr = &ctx->sig_mgr;
+
+       /* if we're here, then there was a signals reservation but cs with
+        * encaps signals wasn't submitted, so need to put refcount
+        * to hw_sob taken at the reservation.
+        */
+       hw_sob_put(handle->hw_sob);
+
+       spin_lock(&mgr->lock);
+       idr_remove(&mgr->handles, handle->id);
+       spin_unlock(&mgr->lock);
+
+       kfree(handle);
+}
+
+static void hl_encaps_sig_mgr_init(struct hl_encaps_signals_mgr *mgr)
+{
+       spin_lock_init(&mgr->lock);
+       idr_init(&mgr->handles);
+}
+
+static void hl_encaps_sig_mgr_fini(struct hl_device *hdev,
+                       struct hl_encaps_signals_mgr *mgr)
+{
+       struct hl_cs_encaps_sig_handle *handle;
+       struct idr *idp;
+       u32 id;
+
+       idp = &mgr->handles;
+
+       if (!idr_is_empty(idp)) {
+               dev_warn(hdev->dev, "device released while some encaps signals handles are still allocated\n");
+               idr_for_each_entry(idp, handle, id)
+                       kref_put(&handle->refcount,
+                                       hl_encaps_handle_do_release_sob);
+       }
+
+       idr_destroy(&mgr->handles);
+}
+
 static void hl_ctx_fini(struct hl_ctx *ctx)
 {
        struct hl_device *hdev = ctx->hdev;
        int i;
 
-       /* Release all allocated pending cb's, those cb's were never
-        * scheduled so it is safe to release them here
-        */
-       hl_pending_cb_list_flush(ctx);
-
        /* Release all allocated HW block mapped list entries and destroy
         * the mutex.
         */
@@ -53,6 +107,7 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
                hl_cb_va_pool_fini(ctx);
                hl_vm_ctx_fini(ctx);
                hl_asid_free(hdev, ctx->asid);
+               hl_encaps_sig_mgr_fini(hdev, &ctx->sig_mgr);
 
                /* Scrub both SRAM and DRAM */
                hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
@@ -130,9 +185,6 @@ void hl_ctx_free(struct hl_device *hdev, struct hl_ctx *ctx)
 {
        if (kref_put(&ctx->refcount, hl_ctx_do_release) == 1)
                return;
-
-       dev_warn(hdev->dev,
-               "user process released device but its command submissions are still executing\n");
 }
 
 int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
@@ -144,11 +196,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
        kref_init(&ctx->refcount);
 
        ctx->cs_sequence = 1;
-       INIT_LIST_HEAD(&ctx->pending_cb_list);
-       spin_lock_init(&ctx->pending_cb_lock);
        spin_lock_init(&ctx->cs_lock);
        atomic_set(&ctx->thread_ctx_switch_token, 1);
-       atomic_set(&ctx->thread_pending_cb_token, 1);
        ctx->thread_ctx_switch_wait_token = 0;
        ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
                                sizeof(struct hl_fence *),
@@ -200,6 +249,8 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
                        goto err_cb_va_pool_fini;
                }
 
+               hl_encaps_sig_mgr_init(&ctx->sig_mgr);
+
                dev_dbg(hdev->dev, "create user context %d\n", ctx->asid);
        }
 
@@ -229,31 +280,86 @@ int hl_ctx_put(struct hl_ctx *ctx)
        return kref_put(&ctx->refcount, hl_ctx_do_release);
 }
 
-struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+/*
+ * hl_ctx_get_fence_locked - get CS fence under CS lock
+ *
+ * @ctx: pointer to the context structure.
+ * @seq: CS sequences number
+ *
+ * @return valid fence pointer on success, NULL if fence is gone, otherwise
+ *         error pointer.
+ *
+ * NOTE: this function shall be called with cs_lock locked
+ */
+static struct hl_fence *hl_ctx_get_fence_locked(struct hl_ctx *ctx, u64 seq)
 {
        struct asic_fixed_properties *asic_prop = &ctx->hdev->asic_prop;
        struct hl_fence *fence;
 
-       spin_lock(&ctx->cs_lock);
-
-       if (seq >= ctx->cs_sequence) {
-               spin_unlock(&ctx->cs_lock);
+       if (seq >= ctx->cs_sequence)
                return ERR_PTR(-EINVAL);
-       }
 
-       if (seq + asic_prop->max_pending_cs < ctx->cs_sequence) {
-               spin_unlock(&ctx->cs_lock);
+       if (seq + asic_prop->max_pending_cs < ctx->cs_sequence)
                return NULL;
-       }
 
        fence = ctx->cs_pending[seq & (asic_prop->max_pending_cs - 1)];
        hl_fence_get(fence);
+       return fence;
+}
+
+struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq)
+{
+       struct hl_fence *fence;
+
+       spin_lock(&ctx->cs_lock);
+
+       fence = hl_ctx_get_fence_locked(ctx, seq);
 
        spin_unlock(&ctx->cs_lock);
 
        return fence;
 }
 
+/*
+ * hl_ctx_get_fences - get multiple CS fences under the same CS lock
+ *
+ * @ctx: pointer to the context structure.
+ * @seq_arr: array of CS sequences to wait for
+ * @fence: fence array to store the CS fences
+ * @arr_len: length of seq_arr and fence_arr
+ *
+ * @return 0 on success, otherwise non 0 error code
+ */
+int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
+                               struct hl_fence **fence, u32 arr_len)
+{
+       struct hl_fence **fence_arr_base = fence;
+       int i, rc = 0;
+
+       spin_lock(&ctx->cs_lock);
+
+       for (i = 0; i < arr_len; i++, fence++) {
+               u64 seq = seq_arr[i];
+
+               *fence = hl_ctx_get_fence_locked(ctx, seq);
+
+               if (IS_ERR(*fence)) {
+                       dev_err(ctx->hdev->dev,
+                               "Failed to get fence for CS with seq 0x%llx\n",
+                                       seq);
+                       rc = PTR_ERR(*fence);
+                       break;
+               }
+       }
+
+       spin_unlock(&ctx->cs_lock);
+
+       if (rc)
+               hl_fences_put(fence_arr_base, i);
+
+       return rc;
+}
+
 /*
  * hl_ctx_mgr_init - initialize the context manager
  *
index 703d79f..985f1f3 100644 (file)
@@ -209,12 +209,12 @@ static int userptr_show(struct seq_file *s, void *data)
                if (first) {
                        first = false;
                        seq_puts(s, "\n");
-                       seq_puts(s, " user virtual address     size             dma dir\n");
+                       seq_puts(s, " pid      user virtual address     size             dma dir\n");
                        seq_puts(s, "----------------------------------------------------------\n");
                }
-               seq_printf(s,
-                       "    0x%-14llx      %-10u    %-30s\n",
-                       userptr->addr, userptr->size, dma_dir[userptr->dir]);
+               seq_printf(s, " %-7d  0x%-14llx      %-10llu    %-30s\n",
+                               userptr->pid, userptr->addr, userptr->size,
+                               dma_dir[userptr->dir]);
        }
 
        spin_unlock(&dev_entry->userptr_spinlock);
@@ -235,7 +235,7 @@ static int vm_show(struct seq_file *s, void *data)
        struct hl_vm_hash_node *hnode;
        struct hl_userptr *userptr;
        struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
-       enum vm_type_t *vm_type;
+       enum vm_type *vm_type;
        bool once = true;
        u64 j;
        int i;
@@ -261,7 +261,7 @@ static int vm_show(struct seq_file *s, void *data)
                        if (*vm_type == VM_TYPE_USERPTR) {
                                userptr = hnode->ptr;
                                seq_printf(s,
-                                       "    0x%-14llx      %-10u\n",
+                                       "    0x%-14llx      %-10llu\n",
                                        hnode->vaddr, userptr->size);
                        } else {
                                phys_pg_pack = hnode->ptr;
@@ -320,6 +320,77 @@ static int vm_show(struct seq_file *s, void *data)
        return 0;
 }
 
+static int userptr_lookup_show(struct seq_file *s, void *data)
+{
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       struct scatterlist *sg;
+       struct hl_userptr *userptr;
+       bool first = true;
+       u64 total_npages, npages, sg_start, sg_end;
+       dma_addr_t dma_addr;
+       int i;
+
+       spin_lock(&dev_entry->userptr_spinlock);
+
+       list_for_each_entry(userptr, &dev_entry->userptr_list, debugfs_list) {
+               if (dev_entry->userptr_lookup >= userptr->addr &&
+               dev_entry->userptr_lookup < userptr->addr + userptr->size) {
+                       total_npages = 0;
+                       for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents,
+                                       i) {
+                               npages = hl_get_sg_info(sg, &dma_addr);
+                               sg_start = userptr->addr +
+                                       total_npages * PAGE_SIZE;
+                               sg_end = userptr->addr +
+                                       (total_npages + npages) * PAGE_SIZE;
+
+                               if (dev_entry->userptr_lookup >= sg_start &&
+                                   dev_entry->userptr_lookup < sg_end) {
+                                       dma_addr += (dev_entry->userptr_lookup -
+                                                       sg_start);
+                                       if (first) {
+                                               first = false;
+                                               seq_puts(s, "\n");
+                                               seq_puts(s, " user virtual address         dma address       pid        region start     region size\n");
+                                               seq_puts(s, "---------------------------------------------------------------------------------------\n");
+                                       }
+                                       seq_printf(s, " 0x%-18llx  0x%-16llx  %-8u  0x%-16llx %-12llu\n",
+                                               dev_entry->userptr_lookup,
+                                               (u64)dma_addr, userptr->pid,
+                                               userptr->addr, userptr->size);
+                               }
+                               total_npages += npages;
+                       }
+               }
+       }
+
+       spin_unlock(&dev_entry->userptr_spinlock);
+
+       if (!first)
+               seq_puts(s, "\n");
+
+       return 0;
+}
+
+static ssize_t userptr_lookup_write(struct file *file, const char __user *buf,
+               size_t count, loff_t *f_pos)
+{
+       struct seq_file *s = file->private_data;
+       struct hl_debugfs_entry *entry = s->private;
+       struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
+       ssize_t rc;
+       u64 value;
+
+       rc = kstrtoull_from_user(buf, count, 16, &value);
+       if (rc)
+               return rc;
+
+       dev_entry->userptr_lookup = value;
+
+       return count;
+}
+
 static int mmu_show(struct seq_file *s, void *data)
 {
        struct hl_debugfs_entry *entry = s->private;
@@ -349,7 +420,7 @@ static int mmu_show(struct seq_file *s, void *data)
                return 0;
        }
 
-       phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
+       hl_mmu_va_to_pa(ctx, virt_addr, &phys_addr);
 
        if (hops_info.scrambled_vaddr &&
                (dev_entry->mmu_addr != hops_info.scrambled_vaddr))
@@ -491,11 +562,10 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u32 size,
        struct hl_vm_phys_pg_pack *phys_pg_pack;
        struct hl_ctx *ctx = hdev->compute_ctx;
        struct hl_vm_hash_node *hnode;
+       u64 end_address, range_size;
        struct hl_userptr *userptr;
-       enum vm_type_t *vm_type;
+       enum vm_type *vm_type;
        bool valid = false;
-       u64 end_address;
-       u32 range_size;
        int i, rc = 0;
 
        if (!ctx) {
@@ -1043,6 +1113,60 @@ static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
        return 0;
 }
 
+static ssize_t hl_state_dump_read(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       ssize_t rc;
+
+       down_read(&entry->state_dump_sem);
+       if (!entry->state_dump[entry->state_dump_head])
+               rc = 0;
+       else
+               rc = simple_read_from_buffer(
+                       buf, count, ppos,
+                       entry->state_dump[entry->state_dump_head],
+                       strlen(entry->state_dump[entry->state_dump_head]));
+       up_read(&entry->state_dump_sem);
+
+       return rc;
+}
+
+static ssize_t hl_state_dump_write(struct file *f, const char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+       ssize_t rc;
+       u32 size;
+       int i;
+
+       rc = kstrtouint_from_user(buf, count, 10, &size);
+       if (rc)
+               return rc;
+
+       if (size <= 0 || size >= ARRAY_SIZE(entry->state_dump)) {
+               dev_err(hdev->dev, "Invalid number of dumps to skip\n");
+               return -EINVAL;
+       }
+
+       if (entry->state_dump[entry->state_dump_head]) {
+               down_write(&entry->state_dump_sem);
+               for (i = 0; i < size; ++i) {
+                       vfree(entry->state_dump[entry->state_dump_head]);
+                       entry->state_dump[entry->state_dump_head] = NULL;
+                       if (entry->state_dump_head > 0)
+                               entry->state_dump_head--;
+                       else
+                               entry->state_dump_head =
+                                       ARRAY_SIZE(entry->state_dump) - 1;
+               }
+               up_write(&entry->state_dump_sem);
+       }
+
+       return count;
+}
+
 static const struct file_operations hl_data32b_fops = {
        .owner = THIS_MODULE,
        .read = hl_data_read32,
@@ -1110,12 +1234,19 @@ static const struct file_operations hl_security_violations_fops = {
        .read = hl_security_violations_read
 };
 
+static const struct file_operations hl_state_dump_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_state_dump_read,
+       .write = hl_state_dump_write
+};
+
 static const struct hl_info_list hl_debugfs_list[] = {
        {"command_buffers", command_buffers_show, NULL},
        {"command_submission", command_submission_show, NULL},
        {"command_submission_jobs", command_submission_jobs_show, NULL},
        {"userptr", userptr_show, NULL},
        {"vm", vm_show, NULL},
+       {"userptr_lookup", userptr_lookup_show, userptr_lookup_write},
        {"mmu", mmu_show, mmu_asid_va_write},
        {"engines", engines_show, NULL}
 };
@@ -1172,6 +1303,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
        INIT_LIST_HEAD(&dev_entry->userptr_list);
        INIT_LIST_HEAD(&dev_entry->ctx_mem_hash_list);
        mutex_init(&dev_entry->file_mutex);
+       init_rwsem(&dev_entry->state_dump_sem);
        spin_lock_init(&dev_entry->cb_spinlock);
        spin_lock_init(&dev_entry->cs_spinlock);
        spin_lock_init(&dev_entry->cs_job_spinlock);
@@ -1283,6 +1415,12 @@ void hl_debugfs_add_device(struct hl_device *hdev)
                                dev_entry->root,
                                &hdev->skip_reset_on_timeout);
 
+       debugfs_create_file("state_dump",
+                               0600,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_state_dump_fops);
+
        for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
                debugfs_create_file(hl_debugfs_list[i].name,
                                        0444,
@@ -1297,6 +1435,7 @@ void hl_debugfs_add_device(struct hl_device *hdev)
 void hl_debugfs_remove_device(struct hl_device *hdev)
 {
        struct hl_dbg_device_entry *entry = &hdev->hl_debugfs;
+       int i;
 
        debugfs_remove_recursive(entry->root);
 
@@ -1304,6 +1443,9 @@ void hl_debugfs_remove_device(struct hl_device *hdev)
 
        vfree(entry->blob_desc.data);
 
+       for (i = 0; i < ARRAY_SIZE(entry->state_dump); ++i)
+               vfree(entry->state_dump[i]);
+
        kfree(entry->entry_arr);
 }
 
@@ -1416,6 +1558,28 @@ void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx)
        spin_unlock(&dev_entry->ctx_mem_hash_spinlock);
 }
 
+/**
+ * hl_debugfs_set_state_dump - register state dump making it accessible via
+ *                             debugfs
+ * @hdev: pointer to the device structure
+ * @data: the actual dump data
+ * @length: the length of the data
+ */
+void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
+                                       unsigned long length)
+{
+       struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
+
+       down_write(&dev_entry->state_dump_sem);
+
+       dev_entry->state_dump_head = (dev_entry->state_dump_head + 1) %
+                                       ARRAY_SIZE(dev_entry->state_dump);
+       vfree(dev_entry->state_dump[dev_entry->state_dump_head]);
+       dev_entry->state_dump[dev_entry->state_dump_head] = data;
+
+       up_write(&dev_entry->state_dump_sem);
+}
+
 void __init hl_debugfs_init(void)
 {
        hl_debug_root = debugfs_create_dir("habanalabs", NULL);
index ff4cbde..97c7c86 100644 (file)
@@ -7,11 +7,11 @@
 
 #define pr_fmt(fmt)                    "habanalabs: " fmt
 
+#include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
 
 #include <linux/pci.h>
 #include <linux/hwmon.h>
-#include <uapi/misc/habanalabs.h>
 
 enum hl_device_status hl_device_status(struct hl_device *hdev)
 {
@@ -23,6 +23,8 @@ enum hl_device_status hl_device_status(struct hl_device *hdev)
                status = HL_DEVICE_STATUS_NEEDS_RESET;
        else if (hdev->disabled)
                status = HL_DEVICE_STATUS_MALFUNCTION;
+       else if (!hdev->init_done)
+               status = HL_DEVICE_STATUS_IN_DEVICE_CREATION;
        else
                status = HL_DEVICE_STATUS_OPERATIONAL;
 
@@ -44,6 +46,7 @@ bool hl_device_operational(struct hl_device *hdev,
        case HL_DEVICE_STATUS_NEEDS_RESET:
                return false;
        case HL_DEVICE_STATUS_OPERATIONAL:
+       case HL_DEVICE_STATUS_IN_DEVICE_CREATION:
        default:
                return true;
        }
@@ -129,8 +132,8 @@ static int hl_device_release(struct inode *inode, struct file *filp)
        hl_ctx_mgr_fini(hdev, &hpriv->ctx_mgr);
 
        if (!hl_hpriv_put(hpriv))
-               dev_warn(hdev->dev,
-                       "Device is still in use because there are live CS and/or memory mappings\n");
+               dev_notice(hdev->dev,
+                       "User process closed FD but device still in use\n");
 
        hdev->last_open_session_duration_jif =
                jiffies - hdev->last_successful_open_jif;
@@ -308,9 +311,15 @@ static void device_hard_reset_pending(struct work_struct *work)
                container_of(work, struct hl_device_reset_work,
                                reset_work.work);
        struct hl_device *hdev = device_reset_work->hdev;
+       u32 flags;
        int rc;
 
-       rc = hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD);
+       flags = HL_RESET_HARD | HL_RESET_FROM_RESET_THREAD;
+
+       if (device_reset_work->fw_reset)
+               flags |= HL_RESET_FW;
+
+       rc = hl_device_reset(hdev, flags);
        if ((rc == -EBUSY) && !hdev->device_fini_pending) {
                dev_info(hdev->dev,
                        "Could not reset device. will try again in %u seconds",
@@ -682,6 +691,44 @@ out:
        return rc;
 }
 
+static void take_release_locks(struct hl_device *hdev)
+{
+       /* Flush anyone that is inside the critical section of enqueue
+        * jobs to the H/W
+        */
+       hdev->asic_funcs->hw_queues_lock(hdev);
+       hdev->asic_funcs->hw_queues_unlock(hdev);
+
+       /* Flush processes that are sending message to CPU */
+       mutex_lock(&hdev->send_cpu_message_lock);
+       mutex_unlock(&hdev->send_cpu_message_lock);
+
+       /* Flush anyone that is inside device open */
+       mutex_lock(&hdev->fpriv_list_lock);
+       mutex_unlock(&hdev->fpriv_list_lock);
+}
+
+static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_reset)
+{
+       if (hard_reset)
+               device_late_fini(hdev);
+
+       /*
+        * Halt the engines and disable interrupts so we won't get any more
+        * completions from H/W and we won't have any accesses from the
+        * H/W to the host machine
+        */
+       hdev->asic_funcs->halt_engines(hdev, hard_reset, fw_reset);
+
+       /* Go over all the queues, release all CS and their jobs */
+       hl_cs_rollback_all(hdev);
+
+       /* Release all pending user interrupts, each pending user interrupt
+        * holds a reference to user context
+        */
+       hl_release_pending_user_interrupts(hdev);
+}
+
 /*
  * hl_device_suspend - initiate device suspend
  *
@@ -707,16 +754,7 @@ int hl_device_suspend(struct hl_device *hdev)
        /* This blocks all other stuff that is not blocked by in_reset */
        hdev->disabled = true;
 
-       /*
-        * Flush anyone that is inside the critical section of enqueue
-        * jobs to the H/W
-        */
-       hdev->asic_funcs->hw_queues_lock(hdev);
-       hdev->asic_funcs->hw_queues_unlock(hdev);
-
-       /* Flush processes that are sending message to CPU */
-       mutex_lock(&hdev->send_cpu_message_lock);
-       mutex_unlock(&hdev->send_cpu_message_lock);
+       take_release_locks(hdev);
 
        rc = hdev->asic_funcs->suspend(hdev);
        if (rc)
@@ -819,6 +857,11 @@ static int device_kill_open_processes(struct hl_device *hdev, u32 timeout)
                        usleep_range(1000, 10000);
 
                        put_task_struct(task);
+               } else {
+                       dev_warn(hdev->dev,
+                               "Can't get task struct for PID so giving up on killing process\n");
+                       mutex_unlock(&hdev->fpriv_list_lock);
+                       return -ETIME;
                }
        }
 
@@ -885,7 +928,7 @@ static void device_disable_open_processes(struct hl_device *hdev)
 int hl_device_reset(struct hl_device *hdev, u32 flags)
 {
        u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
-       bool hard_reset, from_hard_reset_thread, hard_instead_soft = false;
+       bool hard_reset, from_hard_reset_thread, fw_reset, hard_instead_soft = false;
        int i, rc;
 
        if (!hdev->init_done) {
@@ -894,8 +937,9 @@ int hl_device_reset(struct hl_device *hdev, u32 flags)
                return 0;
        }
 
-       hard_reset = (flags & HL_RESET_HARD) != 0;
-       from_hard_reset_thread = (flags & HL_RESET_FROM_RESET_THREAD) != 0;
+       hard_reset = !!(flags & HL_RESET_HARD);
+       from_hard_reset_thread = !!(flags & HL_RESET_FROM_RESET_THREAD);
+       fw_reset = !!(flags & HL_RESET_FW);
 
        if (!hard_reset && !hdev->supports_soft_reset) {
                hard_instead_soft = true;
@@ -947,11 +991,13 @@ do_reset:
                else
                        hdev->curr_reset_cause = HL_RESET_CAUSE_UNKNOWN;
 
-               /*
-                * if reset is due to heartbeat, device CPU is no responsive in
-                * which case no point sending PCI disable message to it
+               /* If reset is due to heartbeat, device CPU is no responsive in
+                * which case no point sending PCI disable message to it.
+                *
+                * If F/W is performing the reset, no need to send it a message to disable
+                * PCI access
                 */
-               if (hard_reset && !(flags & HL_RESET_HEARTBEAT)) {
+               if (hard_reset && !(flags & (HL_RESET_HEARTBEAT | HL_RESET_FW))) {
                        /* Disable PCI access from device F/W so he won't send
                         * us additional interrupts. We disable MSI/MSI-X at
                         * the halt_engines function and we can't have the F/W
@@ -970,15 +1016,7 @@ do_reset:
                /* This also blocks future CS/VM/JOB completion operations */
                hdev->disabled = true;
 
-               /* Flush anyone that is inside the critical section of enqueue
-                * jobs to the H/W
-                */
-               hdev->asic_funcs->hw_queues_lock(hdev);
-               hdev->asic_funcs->hw_queues_unlock(hdev);
-
-               /* Flush anyone that is inside device open */
-               mutex_lock(&hdev->fpriv_list_lock);
-               mutex_unlock(&hdev->fpriv_list_lock);
+               take_release_locks(hdev);
 
                dev_err(hdev->dev, "Going to RESET device!\n");
        }
@@ -989,6 +1027,8 @@ again:
 
                hdev->process_kill_trial_cnt = 0;
 
+               hdev->device_reset_work.fw_reset = fw_reset;
+
                /*
                 * Because the reset function can't run from heartbeat work,
                 * we need to call the reset function from a dedicated work.
@@ -999,31 +1039,7 @@ again:
                return 0;
        }
 
-       if (hard_reset) {
-               device_late_fini(hdev);
-
-               /*
-                * Now that the heartbeat thread is closed, flush processes
-                * which are sending messages to CPU
-                */
-               mutex_lock(&hdev->send_cpu_message_lock);
-               mutex_unlock(&hdev->send_cpu_message_lock);
-       }
-
-       /*
-        * Halt the engines and disable interrupts so we won't get any more
-        * completions from H/W and we won't have any accesses from the
-        * H/W to the host machine
-        */
-       hdev->asic_funcs->halt_engines(hdev, hard_reset);
-
-       /* Go over all the queues, release all CS and their jobs */
-       hl_cs_rollback_all(hdev);
-
-       /* Release all pending user interrupts, each pending user interrupt
-        * holds a reference to user context
-        */
-       hl_release_pending_user_interrupts(hdev);
+       cleanup_resources(hdev, hard_reset, fw_reset);
 
 kill_processes:
        if (hard_reset) {
@@ -1057,12 +1073,15 @@ kill_processes:
        }
 
        /* Reset the H/W. It will be in idle state after this returns */
-       hdev->asic_funcs->hw_fini(hdev, hard_reset);
+       hdev->asic_funcs->hw_fini(hdev, hard_reset, fw_reset);
 
        if (hard_reset) {
+               hdev->fw_loader.linux_loaded = false;
+
                /* Release kernel context */
                if (hdev->kernel_ctx && hl_ctx_put(hdev->kernel_ctx) == 1)
                        hdev->kernel_ctx = NULL;
+
                hl_vm_fini(hdev);
                hl_mmu_fini(hdev);
                hl_eq_reset(hdev, &hdev->event_queue);
@@ -1292,6 +1311,10 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
        if (rc)
                goto user_interrupts_fini;
 
+
+       /* initialize completion structure for multi CS wait */
+       hl_multi_cs_completion_init(hdev);
+
        /*
         * Initialize the H/W queues. Must be done before hw_init, because
         * there the addresses of the kernel queue are being written to the
@@ -1361,6 +1384,8 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 
        hdev->compute_ctx = NULL;
 
+       hdev->asic_funcs->state_dump_init(hdev);
+
        hl_debugfs_add_device(hdev);
 
        /* debugfs nodes are created in hl_ctx_init so it must be called after
@@ -1567,31 +1592,13 @@ void hl_device_fini(struct hl_device *hdev)
        /* Mark device as disabled */
        hdev->disabled = true;
 
-       /* Flush anyone that is inside the critical section of enqueue
-        * jobs to the H/W
-        */
-       hdev->asic_funcs->hw_queues_lock(hdev);
-       hdev->asic_funcs->hw_queues_unlock(hdev);
-
-       /* Flush anyone that is inside device open */
-       mutex_lock(&hdev->fpriv_list_lock);
-       mutex_unlock(&hdev->fpriv_list_lock);
+       take_release_locks(hdev);
 
        hdev->hard_reset_pending = true;
 
        hl_hwmon_fini(hdev);
 
-       device_late_fini(hdev);
-
-       /*
-        * Halt the engines and disable interrupts so we won't get any more
-        * completions from H/W and we won't have any accesses from the
-        * H/W to the host machine
-        */
-       hdev->asic_funcs->halt_engines(hdev, true);
-
-       /* Go over all the queues, release all CS and their jobs */
-       hl_cs_rollback_all(hdev);
+       cleanup_resources(hdev, true, false);
 
        /* Kill processes here after CS rollback. This is because the process
         * can't really exit until all its CSs are done, which is what we
@@ -1610,7 +1617,9 @@ void hl_device_fini(struct hl_device *hdev)
        hl_cb_pool_fini(hdev);
 
        /* Reset the H/W. It will be in idle state after this returns */
-       hdev->asic_funcs->hw_fini(hdev, true);
+       hdev->asic_funcs->hw_fini(hdev, true, false);
+
+       hdev->fw_loader.linux_loaded = false;
 
        /* Release kernel context */
        if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
index 2e4d04e..8d2568c 100644 (file)
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 /*
- * Copyright 2016-2019 HabanaLabs, Ltd.
+ * Copyright 2016-2021 HabanaLabs, Ltd.
  * All Rights Reserved.
  */
 
@@ -240,11 +240,15 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
        /* set fence to a non valid value */
        pkt->fence = cpu_to_le32(UINT_MAX);
 
-       rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
-       if (rc) {
-               dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
-               goto out;
-       }
+       /*
+        * The CPU queue is a synchronous queue with an effective depth of
+        * a single entry (although it is allocated with room for multiple
+        * entries). We lock on it using 'send_cpu_message_lock' which
+        * serializes accesses to the CPU queue.
+        * Which means that we don't need to lock the access to the entire H/W
+        * queues module when submitting a JOB to the CPU queue.
+        */
+       hl_hw_queue_submit_bd(hdev, queue, 0, len, pkt_dma_addr);
 
        if (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
                expected_ack_val = queue->pi;
@@ -663,17 +667,15 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
        hdev->event_queue.check_eqe_index = false;
 
        /* Read FW application security bits again */
-       if (hdev->asic_prop.fw_cpu_boot_dev_sts0_valid) {
-               hdev->asic_prop.fw_app_cpu_boot_dev_sts0 =
-                                               RREG32(sts_boot_dev_sts0_reg);
-               if (hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
+       if (prop->fw_cpu_boot_dev_sts0_valid) {
+               prop->fw_app_cpu_boot_dev_sts0 = RREG32(sts_boot_dev_sts0_reg);
+               if (prop->fw_app_cpu_boot_dev_sts0 &
                                CPU_BOOT_DEV_STS0_EQ_INDEX_EN)
                        hdev->event_queue.check_eqe_index = true;
        }
 
-       if (hdev->asic_prop.fw_cpu_boot_dev_sts1_valid)
-               hdev->asic_prop.fw_app_cpu_boot_dev_sts1 =
-                                               RREG32(sts_boot_dev_sts1_reg);
+       if (prop->fw_cpu_boot_dev_sts1_valid)
+               prop->fw_app_cpu_boot_dev_sts1 = RREG32(sts_boot_dev_sts1_reg);
 
 out:
        hdev->asic_funcs->cpu_accessible_dma_pool_free(hdev,
@@ -1008,6 +1010,11 @@ void hl_fw_ask_halt_machine_without_linux(struct hl_device *hdev)
        } else {
                WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_GOTO_WFE);
                msleep(static_loader->cpu_reset_wait_msec);
+
+               /* Must clear this register in order to prevent preboot
+                * from reading WFE after reboot
+                */
+               WREG32(static_loader->kmd_msg_to_cpu_reg, KMD_MSG_NA);
        }
 
        hdev->device_cpu_is_halted = true;
@@ -1055,6 +1062,10 @@ static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
                dev_err(hdev->dev,
                        "Device boot progress - Thermal Sensor initialization failed\n");
                break;
+       case CPU_BOOT_STATUS_SECURITY_READY:
+               dev_err(hdev->dev,
+                       "Device boot progress - Stuck in preboot after security initialization\n");
+               break;
        default:
                dev_err(hdev->dev,
                        "Device boot progress - Invalid status code %d\n",
@@ -1238,11 +1249,6 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
         *               b. Check whether hard reset is done by boot cpu
         * 3. FW application - a. Fetch fw application security status
         *                     b. Check whether hard reset is done by fw app
-        *
-        * Preboot:
-        * Check security status bit (CPU_BOOT_DEV_STS0_ENABLED). If set, then-
-        * check security enabled bit (CPU_BOOT_DEV_STS0_SECURITY_EN)
-        * If set, then mark GIC controller to be disabled.
         */
        prop->hard_reset_done_by_fw =
                !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
@@ -1953,8 +1959,8 @@ static void hl_fw_dynamic_update_linux_interrupt_if(struct hl_device *hdev)
        if (!hdev->asic_prop.gic_interrupts_enable &&
                        !(hdev->asic_prop.fw_app_cpu_boot_dev_sts0 &
                                CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN)) {
-               dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_irq_ctrl;
-               dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_irq_ctrl;
+               dyn_regs->gic_host_halt_irq = dyn_regs->gic_host_pi_upd_irq;
+               dyn_regs->gic_host_ints_irq = dyn_regs->gic_host_pi_upd_irq;
 
                dev_warn(hdev->dev,
                        "Using a single interrupt interface towards cpucp");
@@ -2122,8 +2128,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
 
        /* Read FW application security bits */
        if (prop->fw_cpu_boot_dev_sts0_valid) {
-               prop->fw_app_cpu_boot_dev_sts0 =
-                               RREG32(cpu_boot_dev_sts0_reg);
+               prop->fw_app_cpu_boot_dev_sts0 = RREG32(cpu_boot_dev_sts0_reg);
 
                if (prop->fw_app_cpu_boot_dev_sts0 &
                                CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
@@ -2143,8 +2148,7 @@ static void hl_fw_linux_update_state(struct hl_device *hdev,
        }
 
        if (prop->fw_cpu_boot_dev_sts1_valid) {
-               prop->fw_app_cpu_boot_dev_sts1 =
-                               RREG32(cpu_boot_dev_sts1_reg);
+               prop->fw_app_cpu_boot_dev_sts1 = RREG32(cpu_boot_dev_sts1_reg);
 
                dev_dbg(hdev->dev,
                        "Firmware application CPU status1 %#x\n",
@@ -2235,6 +2239,10 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
        dev_info(hdev->dev,
                "Loading firmware to device, may take some time...\n");
 
+       /*
+        * In this stage, "cpu_dyn_regs" contains only LKD's hard coded values!
+        * It will be updated from FW after hl_fw_dynamic_request_descriptor().
+        */
        dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
 
        rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
index 6b3cdd7..bebebcb 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/scatterlist.h>
 #include <linux/hashtable.h>
 #include <linux/debugfs.h>
+#include <linux/rwsem.h>
 #include <linux/bitfield.h>
 #include <linux/genalloc.h>
 #include <linux/sched/signal.h>
 
 #define HL_COMMON_USER_INTERRUPT_ID    0xFFF
 
+#define HL_STATE_DUMP_HIST_LEN         5
+
+#define OBJ_NAMES_HASH_TABLE_BITS      7 /* 1 << 7 buckets */
+#define SYNC_TO_ENGINE_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
+
 /* Memory */
 #define MEM_HASH_TABLE_BITS            7 /* 1 << 7 buckets */
 
@@ -122,12 +128,17 @@ enum hl_mmu_page_table_location {
  *
  * - HL_RESET_DEVICE_RELEASE
  *       Set if reset is due to device release
+ *
+ * - HL_RESET_FW
+ *       F/W will perform the reset. No need to ask it to reset the device. This is relevant
+ *       only when running with secured f/w
  */
 #define HL_RESET_HARD                  (1 << 0)
 #define HL_RESET_FROM_RESET_THREAD     (1 << 1)
 #define HL_RESET_HEARTBEAT             (1 << 2)
 #define HL_RESET_TDR                   (1 << 3)
 #define HL_RESET_DEVICE_RELEASE                (1 << 4)
+#define HL_RESET_FW                    (1 << 5)
 
 #define HL_MAX_SOBS_PER_MONITOR        8
 
@@ -236,7 +247,9 @@ enum hl_cs_type {
        CS_TYPE_DEFAULT,
        CS_TYPE_SIGNAL,
        CS_TYPE_WAIT,
-       CS_TYPE_COLLECTIVE_WAIT
+       CS_TYPE_COLLECTIVE_WAIT,
+       CS_RESERVE_SIGNALS,
+       CS_UNRESERVE_SIGNALS
 };
 
 /*
@@ -281,13 +294,17 @@ enum queue_cb_alloc_flags {
  * @hdev: habanalabs device structure.
  * @kref: refcount of this SOB. The SOB will reset once the refcount is zero.
  * @sob_id: id of this SOB.
+ * @sob_addr: the sob offset from the base address.
  * @q_idx: the H/W queue that uses this SOB.
+ * @need_reset: reset indication set when switching to the other sob.
  */
 struct hl_hw_sob {
        struct hl_device        *hdev;
        struct kref             kref;
        u32                     sob_id;
+       u32                     sob_addr;
        u32                     q_idx;
+       bool                    need_reset;
 };
 
 enum hl_collective_mode {
@@ -317,11 +334,11 @@ struct hw_queue_properties {
 };
 
 /**
- * enum vm_type_t - virtual memory mapping request information.
+ * enum vm_type - virtual memory mapping request information.
  * @VM_TYPE_USERPTR: mapping of user memory to device virtual address.
  * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
  */
-enum vm_type_t {
+enum vm_type {
        VM_TYPE_USERPTR = 0x1,
        VM_TYPE_PHYS_PACK = 0x2
 };
@@ -381,6 +398,16 @@ struct hl_mmu_properties {
        u8      host_resident;
 };
 
+/**
+ * struct hl_hints_range - hint addresses reserved va range.
+ * @start_addr: start address of the va range.
+ * @end_addr: end address of the va range.
+ */
+struct hl_hints_range {
+       u64 start_addr;
+       u64 end_addr;
+};
+
 /**
  * struct asic_fixed_properties - ASIC specific immutable properties.
  * @hw_queues_props: H/W queues properties.
@@ -392,6 +419,10 @@ struct hl_mmu_properties {
  * @pmmu: PCI (host) MMU address translation properties.
  * @pmmu_huge: PCI (host) MMU address translation properties for memory
  *              allocated with huge pages.
+ * @hints_dram_reserved_va_range: dram hint addresses reserved range.
+ * @hints_host_reserved_va_range: host hint addresses reserved range.
+ * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved
+ *                                      range.
  * @sram_base_address: SRAM physical start address.
  * @sram_end_address: SRAM physical end address.
  * @sram_user_base_address - SRAM physical start address for user access.
@@ -412,6 +443,10 @@ struct hl_mmu_properties {
  *                    to the device's MMU.
  * @cb_va_end_addr: virtual end address of command buffers which are mapped to
  *                  the device's MMU.
+ * @dram_hints_align_mask: dram va hint addresses alignment mask which is used
+ *                  for hints validity check.
+ * device_dma_offset_for_host_access: the offset to add to host DMA addresses
+ *                                    to enable the device to access them.
  * @mmu_pgt_size: MMU page tables total size.
  * @mmu_pte_size: PTE size in MMU page tables.
  * @mmu_hop_table_size: MMU hop table size.
@@ -459,6 +494,8 @@ struct hl_mmu_properties {
  *                                       reserved for the user
  * @first_available_cq: first available CQ for the user.
  * @user_interrupt_count: number of user interrupts.
+ * @server_type: Server type that the ASIC is currently installed in.
+ *               The value is according to enum hl_server_type in uapi file.
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  * @fw_security_enabled: true if security measures are enabled in firmware,
@@ -470,6 +507,7 @@ struct hl_mmu_properties {
  * @dram_supports_virtual_memory: is there an MMU towards the DRAM
  * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
  * @num_functional_hbms: number of functional HBMs in each DCORE.
+ * @hints_range_reservation: device support hint addresses range reservation.
  * @iatu_done_by_fw: true if iATU configuration is being done by FW.
  * @dynamic_fw_load: is dynamic FW load is supported.
  * @gic_interrupts_enable: true if FW is not blocking GIC controller,
@@ -483,6 +521,9 @@ struct asic_fixed_properties {
        struct hl_mmu_properties        dmmu;
        struct hl_mmu_properties        pmmu;
        struct hl_mmu_properties        pmmu_huge;
+       struct hl_hints_range           hints_dram_reserved_va_range;
+       struct hl_hints_range           hints_host_reserved_va_range;
+       struct hl_hints_range           hints_host_hpage_reserved_va_range;
        u64                             sram_base_address;
        u64                             sram_end_address;
        u64                             sram_user_base_address;
@@ -500,6 +541,8 @@ struct asic_fixed_properties {
        u64                             mmu_dram_default_page_addr;
        u64                             cb_va_start_addr;
        u64                             cb_va_end_addr;
+       u64                             dram_hints_align_mask;
+       u64                             device_dma_offset_for_host_access;
        u32                             mmu_pgt_size;
        u32                             mmu_pte_size;
        u32                             mmu_hop_table_size;
@@ -534,6 +577,7 @@ struct asic_fixed_properties {
        u16                             first_available_user_msix_interrupt;
        u16                             first_available_cq[HL_MAX_DCORES];
        u16                             user_interrupt_count;
+       u16                             server_type;
        u8                              tpc_enabled_mask;
        u8                              completion_queues_count;
        u8                              fw_security_enabled;
@@ -542,6 +586,7 @@ struct asic_fixed_properties {
        u8                              dram_supports_virtual_memory;
        u8                              hard_reset_done_by_fw;
        u8                              num_functional_hbms;
+       u8                              hints_range_reservation;
        u8                              iatu_done_by_fw;
        u8                              dynamic_fw_load;
        u8                              gic_interrupts_enable;
@@ -552,40 +597,45 @@ struct asic_fixed_properties {
  * @completion: fence is implemented using completion
  * @refcount: refcount for this fence
  * @cs_sequence: sequence of the corresponding command submission
+ * @stream_master_qid_map: streams masters QID bitmap to represent all streams
+ *                         masters QIDs that multi cs is waiting on
  * @error: mark this fence with error
  * @timestamp: timestamp upon completion
- *
  */
 struct hl_fence {
        struct completion       completion;
        struct kref             refcount;
        u64                     cs_sequence;
+       u32                     stream_master_qid_map;
        int                     error;
        ktime_t                 timestamp;
 };
 
 /**
  * struct hl_cs_compl - command submission completion object.
- * @sob_reset_work: workqueue object to run SOB reset flow.
  * @base_fence: hl fence object.
  * @lock: spinlock to protect fence.
  * @hdev: habanalabs device structure.
  * @hw_sob: the H/W SOB used in this signal/wait CS.
+ * @encaps_sig_hdl: encaps signals hanlder.
  * @cs_seq: command submission sequence number.
  * @type: type of the CS - signal/wait.
  * @sob_val: the SOB value that is used in this signal/wait CS.
  * @sob_group: the SOB group that is used in this collective wait CS.
+ * @encaps_signals: indication whether it's a completion object of cs with
+ * encaps signals or not.
  */
 struct hl_cs_compl {
-       struct work_struct      sob_reset_work;
        struct hl_fence         base_fence;
        spinlock_t              lock;
        struct hl_device        *hdev;
        struct hl_hw_sob        *hw_sob;
+       struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
        u64                     cs_seq;
        enum hl_cs_type         type;
        u16                     sob_val;
        u16                     sob_group;
+       bool                    encaps_signals;
 };
 
 /*
@@ -697,6 +747,17 @@ struct hl_sync_stream_properties {
        u8              curr_sob_offset;
 };
 
+/**
+ * struct hl_encaps_signals_mgr - describes sync stream encapsulated signals
+ * handlers manager
+ * @lock: protects handles.
+ * @handles: an idr to hold all encapsulated signals handles.
+ */
+struct hl_encaps_signals_mgr {
+       spinlock_t              lock;
+       struct idr              handles;
+};
+
 /**
  * struct hl_hw_queue - describes a H/W transport queue.
  * @shadow_queue: pointer to a shadow queue that holds pointers to jobs.
@@ -875,7 +936,7 @@ struct pci_mem_region {
        u64 region_base;
        u64 region_size;
        u64 bar_size;
-       u32 offset_in_bar;
+       u64 offset_in_bar;
        u8 bar_id;
        u8 used;
 };
@@ -996,7 +1057,7 @@ struct fw_load_mgr {
  *                hw_fini and before CS rollback.
  * @suspend: handles IP specific H/W or SW changes for suspend.
  * @resume: handles IP specific H/W or SW changes for resume.
- * @cb_mmap: maps a CB.
+ * @mmap: maps a memory.
  * @ring_doorbell: increment PI on a given QMAN.
  * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific
  *             function because the PQs are located in different memory areas
@@ -1101,6 +1162,10 @@ struct fw_load_mgr {
  *                         generic f/w compatible PLL Indexes
  * @init_firmware_loader: initialize data for FW loader.
  * @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling
+ * @state_dump_init: initialize constants required for state dump
+ * @get_sob_addr: get SOB base address offset.
+ * @set_pci_memory_regions: setting properties of PCI memory regions
+ * @get_stream_master_qid_arr: get pointer to stream masters QID array
  */
 struct hl_asic_funcs {
        int (*early_init)(struct hl_device *hdev);
@@ -1110,11 +1175,11 @@ struct hl_asic_funcs {
        int (*sw_init)(struct hl_device *hdev);
        int (*sw_fini)(struct hl_device *hdev);
        int (*hw_init)(struct hl_device *hdev);
-       void (*hw_fini)(struct hl_device *hdev, bool hard_reset);
-       void (*halt_engines)(struct hl_device *hdev, bool hard_reset);
+       void (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
+       void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset);
        int (*suspend)(struct hl_device *hdev);
        int (*resume)(struct hl_device *hdev);
-       int (*cb_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+       int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
                        void *cpu_addr, dma_addr_t dma_addr, size_t size);
        void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi);
        void (*pqe_write)(struct hl_device *hdev, __le64 *pqe,
@@ -1210,10 +1275,11 @@ struct hl_asic_funcs {
        void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
        void (*set_dma_mask_from_fw)(struct hl_device *hdev);
        u64 (*get_device_time)(struct hl_device *hdev);
-       void (*collective_wait_init_cs)(struct hl_cs *cs);
+       int (*collective_wait_init_cs)(struct hl_cs *cs);
        int (*collective_wait_create_jobs)(struct hl_device *hdev,
-                       struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
-                       u32 collective_engine_id);
+                       struct hl_ctx *ctx, struct hl_cs *cs,
+                       u32 wait_queue_id, u32 collective_engine_id,
+                       u32 encaps_signal_offset);
        u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
        u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
        void (*ack_protection_bits_errors)(struct hl_device *hdev);
@@ -1226,6 +1292,10 @@ struct hl_asic_funcs {
        int (*map_pll_idx_to_fw_idx)(u32 pll_idx);
        void (*init_firmware_loader)(struct hl_device *hdev);
        void (*init_cpu_scrambler_dram)(struct hl_device *hdev);
+       void (*state_dump_init)(struct hl_device *hdev);
+       u32 (*get_sob_addr)(struct hl_device *hdev, u32 sob_id);
+       void (*set_pci_memory_regions)(struct hl_device *hdev);
+       u32* (*get_stream_master_qid_arr)(void);
 };
 
 
@@ -1282,20 +1352,6 @@ struct hl_cs_counters_atomic {
        atomic64_t validation_drop_cnt;
 };
 
-/**
- * struct hl_pending_cb - pending command buffer structure
- * @cb_node: cb node in pending cb list
- * @cb: command buffer to send in next submission
- * @cb_size: command buffer size
- * @hw_queue_id: destination queue id
- */
-struct hl_pending_cb {
-       struct list_head        cb_node;
-       struct hl_cb            *cb;
-       u32                     cb_size;
-       u32                     hw_queue_id;
-};
-
 /**
  * struct hl_ctx - user/kernel context.
  * @mem_hash: holds mapping from virtual address to virtual memory area
@@ -1312,28 +1368,21 @@ struct hl_pending_cb {
  *            MMU hash or walking the PGT requires talking this lock.
  * @hw_block_list_lock: protects the HW block memory list.
  * @debugfs_list: node in debugfs list of contexts.
- * pending_cb_list: list of pending command buffers waiting to be sent upon
- *                  next user command submission context.
  * @hw_block_mem_list: list of HW block virtual mapped addresses.
  * @cs_counters: context command submission counters.
  * @cb_va_pool: device VA pool for command buffers which are mapped to the
  *              device's MMU.
+ * @sig_mgr: encaps signals handle manager.
  * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
  *                     to user so user could inquire about CS. It is used as
  *                     index to cs_pending array.
  * @dram_default_hops: array that holds all hops addresses needed for default
  *                     DRAM mapping.
- * @pending_cb_lock: spinlock to protect pending cb list
  * @cs_lock: spinlock to protect cs_sequence.
  * @dram_phys_mem: amount of used physical DRAM memory by this context.
  * @thread_ctx_switch_token: token to prevent multiple threads of the same
  *                             context from running the context switch phase.
  *                             Only a single thread should run it.
- * @thread_pending_cb_token: token to prevent multiple threads from processing
- *                             the pending CB list. Only a single thread should
- *                             process the list since it is protected by a
- *                             spinlock and we don't want to halt the entire
- *                             command submission sequence.
  * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
  *                             the context switch phase from moving to their
  *                             execution phase before the context switch phase
@@ -1353,17 +1402,15 @@ struct hl_ctx {
        struct mutex                    mmu_lock;
        struct mutex                    hw_block_list_lock;
        struct list_head                debugfs_list;
-       struct list_head                pending_cb_list;
        struct list_head                hw_block_mem_list;
        struct hl_cs_counters_atomic    cs_counters;
        struct gen_pool                 *cb_va_pool;
+       struct hl_encaps_signals_mgr    sig_mgr;
        u64                             cs_sequence;
        u64                             *dram_default_hops;
-       spinlock_t                      pending_cb_lock;
        spinlock_t                      cs_lock;
        atomic64_t                      dram_phys_mem;
        atomic_t                        thread_ctx_switch_token;
-       atomic_t                        thread_pending_cb_token;
        u32                             thread_ctx_switch_wait_token;
        u32                             asid;
        u32                             handle;
@@ -1394,20 +1441,22 @@ struct hl_ctx_mgr {
  * @sgt: pointer to the scatter-gather table that holds the pages.
  * @dir: for DMA unmapping, the direction must be supplied, so save it.
  * @debugfs_list: node in debugfs list of command submissions.
+ * @pid: the pid of the user process owning the memory
  * @addr: user-space virtual address of the start of the memory area.
  * @size: size of the memory area to pin & map.
  * @dma_mapped: true if the SG was mapped to DMA addresses, false otherwise.
  */
 struct hl_userptr {
-       enum vm_type_t          vm_type; /* must be first */
+       enum vm_type            vm_type; /* must be first */
        struct list_head        job_node;
        struct page             **pages;
        unsigned int            npages;
        struct sg_table         *sgt;
        enum dma_data_direction dir;
        struct list_head        debugfs_list;
+       pid_t                   pid;
        u64                     addr;
-       u32                     size;
+       u64                     size;
        u8                      dma_mapped;
 };
 
@@ -1426,12 +1475,14 @@ struct hl_userptr {
  * @mirror_node : node in device mirror list of command submissions.
  * @staged_cs_node: node in the staged cs list.
  * @debugfs_list: node in debugfs list of command submissions.
+ * @encaps_sig_hdl: holds the encaps signals handle.
  * @sequence: the sequence number of this CS.
  * @staged_sequence: the sequence of the staged submission this CS is part of,
  *                   relevant only if staged_cs is set.
  * @timeout_jiffies: cs timeout in jiffies.
  * @submission_time_jiffies: submission time of the cs
  * @type: CS_TYPE_*.
+ * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs.
  * @submitted: true if CS was submitted to H/W.
  * @completed: true if CS was completed by device.
  * @timedout : true if CS was timedout.
@@ -1445,6 +1496,7 @@ struct hl_userptr {
  * @staged_cs: true if this CS is part of a staged submission.
  * @skip_reset_on_timeout: true if we shall not reset the device in case
  *                         timeout occurs (debug scenario).
+ * @encaps_signals: true if this CS has encaps reserved signals.
  */
 struct hl_cs {
        u16                     *jobs_in_queue_cnt;
@@ -1459,11 +1511,13 @@ struct hl_cs {
        struct list_head        mirror_node;
        struct list_head        staged_cs_node;
        struct list_head        debugfs_list;
+       struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
        u64                     sequence;
        u64                     staged_sequence;
        u64                     timeout_jiffies;
        u64                     submission_time_jiffies;
        enum hl_cs_type         type;
+       u32                     encaps_sig_hdl_id;
        u8                      submitted;
        u8                      completed;
        u8                      timedout;
@@ -1474,6 +1528,7 @@ struct hl_cs {
        u8                      staged_first;
        u8                      staged_cs;
        u8                      skip_reset_on_timeout;
+       u8                      encaps_signals;
 };
 
 /**
@@ -1493,6 +1548,8 @@ struct hl_cs {
  * @hw_queue_id: the id of the H/W queue this job is submitted to.
  * @user_cb_size: the actual size of the CB we got from the user.
  * @job_cb_size: the actual size of the CB that we put on the queue.
+ * @encaps_sig_wait_offset: encapsulated signals offset, which allow user
+ *                          to wait on part of the reserved signals.
  * @is_kernel_allocated_cb: true if the CB handle we got from the user holds a
  *                          handle to a kernel-allocated CB object, false
  *                          otherwise (SRAM/DRAM/host address).
@@ -1517,6 +1574,7 @@ struct hl_cs_job {
        u32                     hw_queue_id;
        u32                     user_cb_size;
        u32                     job_cb_size;
+       u32                     encaps_sig_wait_offset;
        u8                      is_kernel_allocated_cb;
        u8                      contains_dma_pkt;
 };
@@ -1613,7 +1671,7 @@ struct hl_vm_hw_block_list_node {
  * @created_from_userptr: is product of host virtual address.
  */
 struct hl_vm_phys_pg_pack {
-       enum vm_type_t          vm_type; /* must be first */
+       enum vm_type            vm_type; /* must be first */
        u64                     *pages;
        u64                     npages;
        u64                     total_size;
@@ -1759,9 +1817,13 @@ struct hl_debugfs_entry {
  * @ctx_mem_hash_list: list of available contexts with MMU mappings.
  * @ctx_mem_hash_spinlock: protects cb_list.
  * @blob_desc: descriptor of blob
+ * @state_dump: data of the system states in case of a bad cs.
+ * @state_dump_sem: protects state_dump.
  * @addr: next address to read/write from/to in read/write32.
  * @mmu_addr: next virtual address to translate to physical address in mmu_show.
+ * @userptr_lookup: the target user ptr to look up for on demand.
  * @mmu_asid: ASID to use while translating in mmu_show.
+ * @state_dump_head: index of the latest state dump
  * @i2c_bus: generic u8 debugfs file for bus value to use in i2c_data_read.
  * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read.
  * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read.
@@ -1783,14 +1845,149 @@ struct hl_dbg_device_entry {
        struct list_head                ctx_mem_hash_list;
        spinlock_t                      ctx_mem_hash_spinlock;
        struct debugfs_blob_wrapper     blob_desc;
+       char                            *state_dump[HL_STATE_DUMP_HIST_LEN];
+       struct rw_semaphore             state_dump_sem;
        u64                             addr;
        u64                             mmu_addr;
+       u64                             userptr_lookup;
        u32                             mmu_asid;
+       u32                             state_dump_head;
        u8                              i2c_bus;
        u8                              i2c_addr;
        u8                              i2c_reg;
 };
 
+/**
+ * struct hl_hw_obj_name_entry - single hw object name, member of
+ * hl_state_dump_specs
+ * @node: link to the containing hash table
+ * @name: hw object name
+ * @id: object identifier
+ */
+struct hl_hw_obj_name_entry {
+       struct hlist_node       node;
+       const char              *name;
+       u32                     id;
+};
+
+enum hl_state_dump_specs_props {
+       SP_SYNC_OBJ_BASE_ADDR,
+       SP_NEXT_SYNC_OBJ_ADDR,
+       SP_SYNC_OBJ_AMOUNT,
+       SP_MON_OBJ_WR_ADDR_LOW,
+       SP_MON_OBJ_WR_ADDR_HIGH,
+       SP_MON_OBJ_WR_DATA,
+       SP_MON_OBJ_ARM_DATA,
+       SP_MON_OBJ_STATUS,
+       SP_MONITORS_AMOUNT,
+       SP_TPC0_CMDQ,
+       SP_TPC0_CFG_SO,
+       SP_NEXT_TPC,
+       SP_MME_CMDQ,
+       SP_MME_CFG_SO,
+       SP_NEXT_MME,
+       SP_DMA_CMDQ,
+       SP_DMA_CFG_SO,
+       SP_DMA_QUEUES_OFFSET,
+       SP_NUM_OF_MME_ENGINES,
+       SP_SUB_MME_ENG_NUM,
+       SP_NUM_OF_DMA_ENGINES,
+       SP_NUM_OF_TPC_ENGINES,
+       SP_ENGINE_NUM_OF_QUEUES,
+       SP_ENGINE_NUM_OF_STREAMS,
+       SP_ENGINE_NUM_OF_FENCES,
+       SP_FENCE0_CNT_OFFSET,
+       SP_FENCE0_RDATA_OFFSET,
+       SP_CP_STS_OFFSET,
+       SP_NUM_CORES,
+
+       SP_MAX
+};
+
+enum hl_sync_engine_type {
+       ENGINE_TPC,
+       ENGINE_DMA,
+       ENGINE_MME,
+};
+
+/**
+ * struct hl_mon_state_dump - represents a state dump of a single monitor
+ * @id: monitor id
+ * @wr_addr_low: address monitor will write to, low bits
+ * @wr_addr_high: address monitor will write to, high bits
+ * @wr_data: data monitor will write
+ * @arm_data: register value containing monitor configuration
+ * @status: monitor status
+ */
+struct hl_mon_state_dump {
+       u32             id;
+       u32             wr_addr_low;
+       u32             wr_addr_high;
+       u32             wr_data;
+       u32             arm_data;
+       u32             status;
+};
+
+/**
+ * struct hl_sync_to_engine_map_entry - sync object id to engine mapping entry
+ * @engine_type: type of the engine
+ * @engine_id: id of the engine
+ * @sync_id: id of the sync object
+ */
+struct hl_sync_to_engine_map_entry {
+       struct hlist_node               node;
+       enum hl_sync_engine_type        engine_type;
+       u32                             engine_id;
+       u32                             sync_id;
+};
+
+/**
+ * struct hl_sync_to_engine_map - maps sync object id to associated engine id
+ * @tb: hash table containing the mapping, each element is of type
+ *      struct hl_sync_to_engine_map_entry
+ */
+struct hl_sync_to_engine_map {
+       DECLARE_HASHTABLE(tb, SYNC_TO_ENGINE_HASH_TABLE_BITS);
+};
+
+/**
+ * struct hl_state_dump_specs_funcs - virtual functions used by the state dump
+ * @gen_sync_to_engine_map: generate a hash map from sync obj id to its engine
+ * @print_single_monitor: format monitor data as string
+ * @monitor_valid: return true if given monitor dump is valid
+ * @print_fences_single_engine: format fences data as string
+ */
+struct hl_state_dump_specs_funcs {
+       int (*gen_sync_to_engine_map)(struct hl_device *hdev,
+                               struct hl_sync_to_engine_map *map);
+       int (*print_single_monitor)(char **buf, size_t *size, size_t *offset,
+                                   struct hl_device *hdev,
+                                   struct hl_mon_state_dump *mon);
+       int (*monitor_valid)(struct hl_mon_state_dump *mon);
+       int (*print_fences_single_engine)(struct hl_device *hdev,
+                                       u64 base_offset,
+                                       u64 status_base_offset,
+                                       enum hl_sync_engine_type engine_type,
+                                       u32 engine_id, char **buf,
+                                       size_t *size, size_t *offset);
+};
+
+/**
+ * struct hl_state_dump_specs - defines ASIC known hw objects names
+ * @so_id_to_str_tb: sync objects names index table
+ * @monitor_id_to_str_tb: monitors names index table
+ * @funcs: virtual functions used for state dump
+ * @sync_namager_names: readable names for sync manager if available (ex: N_E)
+ * @props: pointer to a per asic const props array required for state dump
+ */
+struct hl_state_dump_specs {
+       DECLARE_HASHTABLE(so_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
+       DECLARE_HASHTABLE(monitor_id_to_str_tb, OBJ_NAMES_HASH_TABLE_BITS);
+       struct hl_state_dump_specs_funcs        funcs;
+       const char * const                      *sync_namager_names;
+       s64                                     *props;
+};
+
 
 /*
  * DEVICES
@@ -1798,7 +1995,7 @@ struct hl_dbg_device_entry {
 
 #define HL_STR_MAX     32
 
-#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_NEEDS_RESET + 1)
+#define HL_DEV_STS_MAX (HL_DEVICE_STATUS_LAST + 1)
 
 /* Theoretical limit only. A single host can only contain up to 4 or 8 PCIe
  * x16 cards. In extreme cases, there are hosts that can accommodate 16 cards.
@@ -1946,11 +2143,13 @@ struct hwmon_chip_info;
  * @wq: work queue for device reset procedure.
  * @reset_work: reset work to be done.
  * @hdev: habanalabs device structure.
+ * @fw_reset: whether f/w will do the reset without us sending them a message to do it.
  */
 struct hl_device_reset_work {
        struct workqueue_struct         *wq;
        struct delayed_work             reset_work;
        struct hl_device                *hdev;
+       bool                            fw_reset;
 };
 
 /**
@@ -2064,6 +2263,58 @@ struct hl_mmu_funcs {
                        u64 virt_addr, struct hl_mmu_hop_info *hops);
 };
 
+/**
+ * number of user contexts allowed to call wait_for_multi_cs ioctl in
+ * parallel
+ */
+#define MULTI_CS_MAX_USER_CTX  2
+
+/**
+ * struct multi_cs_completion - multi CS wait completion.
+ * @completion: completion of any of the CS in the list
+ * @lock: spinlock for the completion structure
+ * @timestamp: timestamp for the multi-CS completion
+ * @stream_master_qid_map: bitmap of all stream masters on which the multi-CS
+ *                        is waiting
+ * @used: 1 if in use, otherwise 0
+ */
+struct multi_cs_completion {
+       struct completion       completion;
+       spinlock_t              lock;
+       s64                     timestamp;
+       u32                     stream_master_qid_map;
+       u8                      used;
+};
+
+/**
+ * struct multi_cs_data - internal data for multi CS call
+ * @ctx: pointer to the context structure
+ * @fence_arr: array of fences of all CSs
+ * @seq_arr: array of CS sequence numbers
+ * @timeout_us: timeout in usec for waiting for CS to complete
+ * @timestamp: timestamp of first completed CS
+ * @wait_status: wait for CS status
+ * @completion_bitmap: bitmap of completed CSs (1- completed, otherwise 0)
+ * @stream_master_qid_map: bitmap of all stream master QIDs on which the
+ *                         multi-CS is waiting
+ * @arr_len: fence_arr and seq_arr array length
+ * @gone_cs: indication of gone CS (1- there was gone CS, otherwise 0)
+ * @update_ts: update timestamp. 1- update the timestamp, otherwise 0.
+ */
+struct multi_cs_data {
+       struct hl_ctx   *ctx;
+       struct hl_fence **fence_arr;
+       u64             *seq_arr;
+       s64             timeout_us;
+       s64             timestamp;
+       long            wait_status;
+       u32             completion_bitmap;
+       u32             stream_master_qid_map;
+       u8              arr_len;
+       u8              gone_cs;
+       u8              update_ts;
+};
+
 /**
  * struct hl_device - habanalabs device structure.
  * @pdev: pointer to PCI device, can be NULL in case of simulator device.
@@ -2129,6 +2380,8 @@ struct hl_mmu_funcs {
  * @mmu_func: device-related MMU functions.
  * @fw_loader: FW loader manager.
  * @pci_mem_region: array of memory regions in the PCI
+ * @state_dump_specs: constants and dictionaries needed to dump system state.
+ * @multi_cs_completion: array of multi-CS completion.
  * @dram_used_mem: current DRAM memory consumption.
  * @timeout_jiffies: device CS timeout value.
  * @max_power: the max power of the device, as configured by the sysadmin. This
@@ -2205,6 +2458,7 @@ struct hl_mmu_funcs {
  *                        halted. We can't halt it again because the COMMS
  *                        protocol will throw an error. Relevant only for
  *                        cases where Linux was not loaded to device CPU
+ * @supports_wait_for_multi_cs: true if wait for multi CS is supported
  */
 struct hl_device {
        struct pci_dev                  *pdev;
@@ -2273,6 +2527,11 @@ struct hl_device {
 
        struct pci_mem_region           pci_mem_region[PCI_REGION_NUMBER];
 
+       struct hl_state_dump_specs      state_dump_specs;
+
+       struct multi_cs_completion      multi_cs_completion[
+                                                       MULTI_CS_MAX_USER_CTX];
+       u32                             *stream_master_qid_arr;
        atomic64_t                      dram_used_mem;
        u64                             timeout_jiffies;
        u64                             max_power;
@@ -2322,6 +2581,8 @@ struct hl_device {
        u8                              curr_reset_cause;
        u8                              skip_reset_on_timeout;
        u8                              device_cpu_is_halted;
+       u8                              supports_wait_for_multi_cs;
+       u8                              stream_master_qid_arr_size;
 
        /* Parameters for bring-up */
        u64                             nic_ports_mask;
@@ -2343,6 +2604,29 @@ struct hl_device {
 };
 
 
+/**
+ * struct hl_cs_encaps_sig_handle - encapsulated signals handle structure
+ * @refcount: refcount used to protect removing this id when several
+ *            wait cs are used to wait of the reserved encaps signals.
+ * @hdev: pointer to habanalabs device structure.
+ * @hw_sob: pointer to  H/W SOB used in the reservation.
+ * @cs_seq: staged cs sequence which contains encapsulated signals
+ * @id: idr handler id to be used to fetch the handler info
+ * @q_idx: stream queue index
+ * @pre_sob_val: current SOB value before reservation
+ * @count: signals number
+ */
+struct hl_cs_encaps_sig_handle {
+       struct kref refcount;
+       struct hl_device *hdev;
+       struct hl_hw_sob *hw_sob;
+       u64  cs_seq;
+       u32  id;
+       u32  q_idx;
+       u32  pre_sob_val;
+       u32  count;
+};
+
 /*
  * IOCTLs
  */
@@ -2372,6 +2656,23 @@ struct hl_ioctl_desc {
  * Kernel module functions that can be accessed by entire module
  */
 
+/**
+ * hl_get_sg_info() - get number of pages and the DMA address from SG list.
+ * @sg: the SG list.
+ * @dma_addr: pointer to DMA address to return.
+ *
+ * Calculate the number of consecutive pages described by the SG list. Take the
+ * offset of the address in the first page, add to it the length and round it up
+ * to the number of needed pages.
+ */
+static inline u32 hl_get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
+{
+       *dma_addr = sg_dma_address(sg);
+
+       return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
+                       (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+}
+
 /**
  * hl_mem_area_inside_range() - Checks whether address+size are inside a range.
  * @address: The start address of the area we want to validate.
@@ -2436,7 +2737,9 @@ void destroy_hdev(struct hl_device *hdev);
 int hl_hw_queues_create(struct hl_device *hdev);
 void hl_hw_queues_destroy(struct hl_device *hdev);
 int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
-                               u32 cb_size, u64 cb_ptr);
+               u32 cb_size, u64 cb_ptr);
+void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
+               u32 ctl, u32 len, u64 ptr);
 int hl_hw_queue_schedule_cs(struct hl_cs *cs);
 u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
 void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
@@ -2470,6 +2773,8 @@ void hl_ctx_do_release(struct kref *ref);
 void hl_ctx_get(struct hl_device *hdev,        struct hl_ctx *ctx);
 int hl_ctx_put(struct hl_ctx *ctx);
 struct hl_fence *hl_ctx_get_fence(struct hl_ctx *ctx, u64 seq);
+int hl_ctx_get_fences(struct hl_ctx *ctx, u64 *seq_arr,
+                               struct hl_fence **fence, u32 arr_len);
 void hl_ctx_mgr_init(struct hl_ctx_mgr *mgr);
 void hl_ctx_mgr_fini(struct hl_device *hdev, struct hl_ctx_mgr *mgr);
 
@@ -2511,18 +2816,19 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
 void hl_cb_va_pool_fini(struct hl_ctx *ctx);
 
 void hl_cs_rollback_all(struct hl_device *hdev);
-void hl_pending_cb_list_flush(struct hl_ctx *ctx);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
                enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 void hl_sob_reset_error(struct kref *ref);
 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
 void hl_fence_put(struct hl_fence *fence);
+void hl_fences_put(struct hl_fence **fence, int len);
 void hl_fence_get(struct hl_fence *fence);
 void cs_get(struct hl_cs *cs);
 bool cs_needs_completion(struct hl_cs *cs);
 bool cs_needs_timeout(struct hl_cs *cs);
 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
+void hl_multi_cs_completion_init(struct hl_device *hdev);
 
 void goya_set_asic_funcs(struct hl_device *hdev);
 void gaudi_set_asic_funcs(struct hl_device *hdev);
@@ -2650,9 +2956,25 @@ int hl_set_voltage(struct hl_device *hdev,
                        int sensor_index, u32 attr, long value);
 int hl_set_current(struct hl_device *hdev,
                        int sensor_index, u32 attr, long value);
+void hw_sob_get(struct hl_hw_sob *hw_sob);
+void hw_sob_put(struct hl_hw_sob *hw_sob);
+void hl_encaps_handle_do_release(struct kref *ref);
+void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
+                       struct hl_cs *cs, struct hl_cs_job *job,
+                       struct hl_cs_compl *cs_cmpl);
 void hl_release_pending_user_interrupts(struct hl_device *hdev);
 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx,
-                       struct hl_hw_sob **hw_sob, u32 count);
+                       struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig);
+
+int hl_state_dump(struct hl_device *hdev);
+const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id);
+const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
+                                       struct hl_mon_state_dump *mon);
+void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map);
+__printf(4, 5) int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
+                                       const char *format, ...);
+char *hl_format_as_binary(char *buf, size_t buf_len, u32 n);
+const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type);
 
 #ifdef CONFIG_DEBUG_FS
 
@@ -2673,6 +2995,8 @@ void hl_debugfs_remove_userptr(struct hl_device *hdev,
                                struct hl_userptr *userptr);
 void hl_debugfs_add_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
 void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx);
+void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data,
+                                       unsigned long length);
 
 #else
 
@@ -2746,6 +3070,11 @@ static inline void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev,
 {
 }
 
+static inline void hl_debugfs_set_state_dump(struct hl_device *hdev,
+                                       char *data, unsigned long length)
+{
+}
+
 #endif
 
 /* IOCTLs */
index 4194cda..a75e4fc 100644 (file)
@@ -141,7 +141,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
        hl_cb_mgr_init(&hpriv->cb_mgr);
        hl_ctx_mgr_init(&hpriv->ctx_mgr);
 
-       hpriv->taskpid = find_get_pid(current->pid);
+       hpriv->taskpid = get_task_pid(current, PIDTYPE_PID);
 
        mutex_lock(&hdev->fpriv_list_lock);
 
@@ -194,7 +194,6 @@ int hl_device_open(struct inode *inode, struct file *filp)
 
 out_err:
        mutex_unlock(&hdev->fpriv_list_lock);
-
        hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
        hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
        filp->private_data = NULL;
@@ -318,12 +317,16 @@ int create_hdev(struct hl_device **dev, struct pci_dev *pdev,
                hdev->asic_prop.fw_security_enabled = false;
 
        /* Assign status description string */
-       strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
-                                       "disabled", HL_STR_MAX);
+       strncpy(hdev->status[HL_DEVICE_STATUS_OPERATIONAL],
+                                       "operational", HL_STR_MAX);
        strncpy(hdev->status[HL_DEVICE_STATUS_IN_RESET],
                                        "in reset", HL_STR_MAX);
+       strncpy(hdev->status[HL_DEVICE_STATUS_MALFUNCTION],
+                                       "disabled", HL_STR_MAX);
        strncpy(hdev->status[HL_DEVICE_STATUS_NEEDS_RESET],
                                        "needs reset", HL_STR_MAX);
+       strncpy(hdev->status[HL_DEVICE_STATUS_IN_DEVICE_CREATION],
+                                       "in device creation", HL_STR_MAX);
 
        hdev->major = hl_major;
        hdev->reset_on_lockup = reset_on_lockup;
@@ -532,7 +535,7 @@ hl_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t state)
                result = PCI_ERS_RESULT_NONE;
        }
 
-       hdev->asic_funcs->halt_engines(hdev, true);
+       hdev->asic_funcs->halt_engines(hdev, true, false);
 
        return result;
 }
index f4dda7b..86c3257 100644 (file)
@@ -94,6 +94,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 
        hw_ip.first_available_interrupt_id =
                        prop->first_available_user_msix_interrupt;
+       hw_ip.server_type = prop->server_type;
+
        return copy_to_user(out, &hw_ip,
                min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
 }
index bcabfdb..76b7de8 100644 (file)
@@ -65,7 +65,7 @@ void hl_hw_queue_update_ci(struct hl_cs *cs)
 }
 
 /*
- * ext_and_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
+ * hl_hw_queue_submit_bd() - Submit a buffer descriptor to an external or a
  *                                H/W queue.
  * @hdev: pointer to habanalabs device structure
  * @q: pointer to habanalabs queue structure
@@ -80,8 +80,8 @@ void hl_hw_queue_update_ci(struct hl_cs *cs)
  * This function must be called when the scheduler mutex is taken
  *
  */
-static void ext_and_hw_queue_submit_bd(struct hl_device *hdev,
-                       struct hl_hw_queue *q, u32 ctl, u32 len, u64 ptr)
+void hl_hw_queue_submit_bd(struct hl_device *hdev, struct hl_hw_queue *q,
+               u32 ctl, u32 len, u64 ptr)
 {
        struct hl_bd *bd;
 
@@ -222,8 +222,8 @@ static int hw_queue_sanity_checks(struct hl_device *hdev, struct hl_hw_queue *q,
  * @cb_size: size of CB
  * @cb_ptr: pointer to CB location
  *
- * This function sends a single CB, that must NOT generate a completion entry
- *
+ * This function sends a single CB, that must NOT generate a completion entry.
+ * Sending CPU messages can be done instead via 'hl_hw_queue_submit_bd()'
  */
 int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
                                u32 cb_size, u64 cb_ptr)
@@ -231,16 +231,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
        struct hl_hw_queue *q = &hdev->kernel_queues[hw_queue_id];
        int rc = 0;
 
-       /*
-        * The CPU queue is a synchronous queue with an effective depth of
-        * a single entry (although it is allocated with room for multiple
-        * entries). Therefore, there is a different lock, called
-        * send_cpu_message_lock, that serializes accesses to the CPU queue.
-        * As a result, we don't need to lock the access to the entire H/W
-        * queues module when submitting a JOB to the CPU queue
-        */
-       if (q->queue_type != QUEUE_TYPE_CPU)
-               hdev->asic_funcs->hw_queues_lock(hdev);
+       hdev->asic_funcs->hw_queues_lock(hdev);
 
        if (hdev->disabled) {
                rc = -EPERM;
@@ -258,11 +249,10 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
                        goto out;
        }
 
-       ext_and_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
+       hl_hw_queue_submit_bd(hdev, q, 0, cb_size, cb_ptr);
 
 out:
-       if (q->queue_type != QUEUE_TYPE_CPU)
-               hdev->asic_funcs->hw_queues_unlock(hdev);
+       hdev->asic_funcs->hw_queues_unlock(hdev);
 
        return rc;
 }
@@ -328,7 +318,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
        cq->pi = hl_cq_inc_ptr(cq->pi);
 
 submit_bd:
-       ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+       hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 }
 
 /*
@@ -407,7 +397,7 @@ static void hw_queue_schedule_job(struct hl_cs_job *job)
        else
                ptr = (u64) (uintptr_t) job->user_cb;
 
-       ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
+       hl_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 }
 
 static int init_signal_cs(struct hl_device *hdev,
@@ -426,8 +416,9 @@ static int init_signal_cs(struct hl_device *hdev,
        cs_cmpl->sob_val = prop->next_sob_val;
 
        dev_dbg(hdev->dev,
-               "generate signal CB, sob_id: %d, sob val: 0x%x, q_idx: %d\n",
-               cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx);
+               "generate signal CB, sob_id: %d, sob val: %u, q_idx: %d, seq: %llu\n",
+               cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val, q_idx,
+               cs_cmpl->cs_seq);
 
        /* we set an EB since we must make sure all oeprations are done
         * when sending the signal
@@ -435,17 +426,37 @@ static int init_signal_cs(struct hl_device *hdev,
        hdev->asic_funcs->gen_signal_cb(hdev, job->patched_cb,
                                cs_cmpl->hw_sob->sob_id, 0, true);
 
-       rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1);
+       rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, 1,
+                                                               false);
 
        return rc;
 }
 
-static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
+void hl_hw_queue_encaps_sig_set_sob_info(struct hl_device *hdev,
+                       struct hl_cs *cs, struct hl_cs_job *job,
+                       struct hl_cs_compl *cs_cmpl)
+{
+       struct hl_cs_encaps_sig_handle *handle = cs->encaps_sig_hdl;
+
+       cs_cmpl->hw_sob = handle->hw_sob;
+
+       /* Note that encaps_sig_wait_offset was validated earlier in the flow
+        * for offset value which exceeds the max reserved signal count.
+        * always decrement 1 of the offset since when the user
+        * set offset 1 for example he mean to wait only for the first
+        * signal only, which will be pre_sob_val, and if he set offset 2
+        * then the value required is (pre_sob_val + 1) and so on...
+        */
+       cs_cmpl->sob_val = handle->pre_sob_val +
+                       (job->encaps_sig_wait_offset - 1);
+}
+
+static int init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
                struct hl_cs_job *job, struct hl_cs_compl *cs_cmpl)
 {
-       struct hl_cs_compl *signal_cs_cmpl;
-       struct hl_sync_stream_properties *prop;
        struct hl_gen_wait_properties wait_prop;
+       struct hl_sync_stream_properties *prop;
+       struct hl_cs_compl *signal_cs_cmpl;
        u32 q_idx;
 
        q_idx = job->hw_queue_id;
@@ -455,14 +466,51 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
                                        struct hl_cs_compl,
                                        base_fence);
 
-       /* copy the SOB id and value of the signal CS */
-       cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
-       cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+       if (cs->encaps_signals) {
+               /* use the encaps signal handle stored earlier in the flow
+                * and set the SOB information from the encaps
+                * signals handle
+                */
+               hl_hw_queue_encaps_sig_set_sob_info(hdev, cs, job, cs_cmpl);
+
+               dev_dbg(hdev->dev, "Wait for encaps signals handle, qidx(%u), CS sequence(%llu), sob val: 0x%x, offset: %u\n",
+                               cs->encaps_sig_hdl->q_idx,
+                               cs->encaps_sig_hdl->cs_seq,
+                               cs_cmpl->sob_val,
+                               job->encaps_sig_wait_offset);
+       } else {
+               /* Copy the SOB id and value of the signal CS */
+               cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+               cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+       }
+
+       /* check again if the signal cs already completed.
+        * if yes then don't send any wait cs since the hw_sob
+        * could be in reset already. if signal is not completed
+        * then get refcount to hw_sob to prevent resetting the sob
+        * while wait cs is not submitted.
+        * note that this check is protected by two locks,
+        * hw queue lock and completion object lock,
+        * and the same completion object lock also protects
+        * the hw_sob reset handler function.
+        * The hw_queue lock prevent out of sync of hw_sob
+        * refcount value, changed by signal/wait flows.
+        */
+       spin_lock(&signal_cs_cmpl->lock);
+
+       if (completion_done(&cs->signal_fence->completion)) {
+               spin_unlock(&signal_cs_cmpl->lock);
+               return -EINVAL;
+       }
+
+       kref_get(&cs_cmpl->hw_sob->kref);
+
+       spin_unlock(&signal_cs_cmpl->lock);
 
        dev_dbg(hdev->dev,
-               "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d\n",
+               "generate wait CB, sob_id: %d, sob_val: 0x%x, mon_id: %d, q_idx: %d, seq: %llu\n",
                cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
-               prop->base_mon_id, q_idx);
+               prop->base_mon_id, q_idx, cs->sequence);
 
        wait_prop.data = (void *) job->patched_cb;
        wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
@@ -471,17 +519,14 @@ static void init_wait_cs(struct hl_device *hdev, struct hl_cs *cs,
        wait_prop.mon_id = prop->base_mon_id;
        wait_prop.q_idx = q_idx;
        wait_prop.size = 0;
+
        hdev->asic_funcs->gen_wait_cb(hdev, &wait_prop);
 
-       kref_get(&cs_cmpl->hw_sob->kref);
-       /*
-        * Must put the signal fence after the SOB refcnt increment so
-        * the SOB refcnt won't turn 0 and reset the SOB before the
-        * wait CS was submitted.
-        */
        mb();
        hl_fence_put(cs->signal_fence);
        cs->signal_fence = NULL;
+
+       return 0;
 }
 
 /*
@@ -506,7 +551,60 @@ static int init_signal_wait_cs(struct hl_cs *cs)
        if (cs->type & CS_TYPE_SIGNAL)
                rc = init_signal_cs(hdev, job, cs_cmpl);
        else if (cs->type & CS_TYPE_WAIT)
-               init_wait_cs(hdev, cs, job, cs_cmpl);
+               rc = init_wait_cs(hdev, cs, job, cs_cmpl);
+
+       return rc;
+}
+
+static int encaps_sig_first_staged_cs_handler
+                       (struct hl_device *hdev, struct hl_cs *cs)
+{
+       struct hl_cs_compl *cs_cmpl =
+                       container_of(cs->fence,
+                                       struct hl_cs_compl, base_fence);
+       struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
+       struct hl_encaps_signals_mgr *mgr;
+       int rc = 0;
+
+       mgr = &hdev->compute_ctx->sig_mgr;
+
+       spin_lock(&mgr->lock);
+       encaps_sig_hdl = idr_find(&mgr->handles, cs->encaps_sig_hdl_id);
+       if (encaps_sig_hdl) {
+               /*
+                * Set handler CS sequence,
+                * the CS which contains the encapsulated signals.
+                */
+               encaps_sig_hdl->cs_seq = cs->sequence;
+               /* store the handle and set encaps signal indication,
+                * to be used later in cs_do_release to put the last
+                * reference to encaps signals handlers.
+                */
+               cs_cmpl->encaps_signals = true;
+               cs_cmpl->encaps_sig_hdl = encaps_sig_hdl;
+
+               /* set hw_sob pointer in completion object
+                * since it's used in cs_do_release flow to put
+                * refcount to sob
+                */
+               cs_cmpl->hw_sob = encaps_sig_hdl->hw_sob;
+               cs_cmpl->sob_val = encaps_sig_hdl->pre_sob_val +
+                                               encaps_sig_hdl->count;
+
+               dev_dbg(hdev->dev, "CS seq (%llu) added to encaps signal handler id (%u), count(%u), qidx(%u), sob(%u), val(%u)\n",
+                               cs->sequence, encaps_sig_hdl->id,
+                               encaps_sig_hdl->count,
+                               encaps_sig_hdl->q_idx,
+                               cs_cmpl->hw_sob->sob_id,
+                               cs_cmpl->sob_val);
+
+       } else {
+               dev_err(hdev->dev, "encaps handle id(%u) wasn't found!\n",
+                               cs->encaps_sig_hdl_id);
+               rc = -EINVAL;
+       }
+
+       spin_unlock(&mgr->lock);
 
        return rc;
 }
@@ -581,14 +679,21 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 
        if ((cs->type == CS_TYPE_SIGNAL) || (cs->type == CS_TYPE_WAIT)) {
                rc = init_signal_wait_cs(cs);
-               if (rc) {
-                       dev_err(hdev->dev, "Failed to submit signal cs\n");
+               if (rc)
                        goto unroll_cq_resv;
-               }
-       } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT)
-               hdev->asic_funcs->collective_wait_init_cs(cs);
+       } else if (cs->type == CS_TYPE_COLLECTIVE_WAIT) {
+               rc = hdev->asic_funcs->collective_wait_init_cs(cs);
+               if (rc)
+                       goto unroll_cq_resv;
+       }
 
 
+       if (cs->encaps_signals && cs->staged_first) {
+               rc = encaps_sig_first_staged_cs_handler(hdev, cs);
+               if (rc)
+                       goto unroll_cq_resv;
+       }
+
        spin_lock(&hdev->cs_mirror_lock);
 
        /* Verify staged CS exists and add to the staged list */
@@ -613,6 +718,11 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
                }
 
                list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
+
+               /* update stream map of the first CS */
+               if (hdev->supports_wait_for_multi_cs)
+                       staged_cs->fence->stream_master_qid_map |=
+                                       cs->fence->stream_master_qid_map;
        }
 
        list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
@@ -834,6 +944,8 @@ static void sync_stream_queue_init(struct hl_device *hdev, u32 q_idx)
                hw_sob = &sync_stream_prop->hw_sob[sob];
                hw_sob->hdev = hdev;
                hw_sob->sob_id = sync_stream_prop->base_sob_id + sob;
+               hw_sob->sob_addr =
+                       hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id);
                hw_sob->q_idx = q_idx;
                kref_init(&hw_sob->kref);
        }
index af339ce..3398693 100644 (file)
@@ -124,7 +124,7 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
 
        spin_lock(&vm->idr_lock);
        handle = idr_alloc(&vm->phys_pg_pack_handles, phys_pg_pack, 1, 0,
-                               GFP_KERNEL);
+                               GFP_ATOMIC);
        spin_unlock(&vm->idr_lock);
 
        if (handle < 0) {
@@ -528,6 +528,33 @@ static inline int add_va_block(struct hl_device *hdev,
        return rc;
 }
 
+/**
+ * is_hint_crossing_range() - check if hint address crossing specified reserved
+ * range.
+ */
+static inline bool is_hint_crossing_range(enum hl_va_range_type range_type,
+               u64 start_addr, u32 size, struct asic_fixed_properties *prop) {
+       bool range_cross;
+
+       if (range_type == HL_VA_RANGE_TYPE_DRAM)
+               range_cross =
+                       hl_mem_area_crosses_range(start_addr, size,
+                       prop->hints_dram_reserved_va_range.start_addr,
+                       prop->hints_dram_reserved_va_range.end_addr);
+       else if (range_type == HL_VA_RANGE_TYPE_HOST)
+               range_cross =
+                       hl_mem_area_crosses_range(start_addr,   size,
+                       prop->hints_host_reserved_va_range.start_addr,
+                       prop->hints_host_reserved_va_range.end_addr);
+       else
+               range_cross =
+                       hl_mem_area_crosses_range(start_addr, size,
+                       prop->hints_host_hpage_reserved_va_range.start_addr,
+                       prop->hints_host_hpage_reserved_va_range.end_addr);
+
+       return range_cross;
+}
+
 /**
  * get_va_block() - get a virtual block for the given size and alignment.
  *
@@ -536,6 +563,8 @@ static inline int add_va_block(struct hl_device *hdev,
  * @size: requested block size.
  * @hint_addr: hint for requested address by the user.
  * @va_block_align: required alignment of the virtual block start address.
+ * @range_type: va range type (host, dram)
+ * @flags: additional memory flags, currently only uses HL_MEM_FORCE_HINT
  *
  * This function does the following:
  * - Iterate on the virtual block list to find a suitable virtual block for the
@@ -545,13 +574,19 @@ static inline int add_va_block(struct hl_device *hdev,
  */
 static u64 get_va_block(struct hl_device *hdev,
                                struct hl_va_range *va_range,
-                               u64 size, u64 hint_addr, u32 va_block_align)
+                               u64 size, u64 hint_addr, u32 va_block_align,
+                               enum hl_va_range_type range_type,
+                               u32 flags)
 {
        struct hl_vm_va_block *va_block, *new_va_block = NULL;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
        u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
-               align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
+               align_mask, reserved_valid_start = 0, reserved_valid_size = 0,
+               dram_hint_mask = prop->dram_hints_align_mask;
        bool add_prev = false;
        bool is_align_pow_2  = is_power_of_2(va_range->page_size);
+       bool is_hint_dram_addr = hl_is_dram_va(hdev, hint_addr);
+       bool force_hint = flags & HL_MEM_FORCE_HINT;
 
        if (is_align_pow_2)
                align_mask = ~((u64)va_block_align - 1);
@@ -564,12 +599,20 @@ static u64 get_va_block(struct hl_device *hdev,
                size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
                                                        va_range->page_size;
 
-       tmp_hint_addr = hint_addr;
+       tmp_hint_addr = hint_addr & ~dram_hint_mask;
 
        /* Check if we need to ignore hint address */
        if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
-                       (!is_align_pow_2 &&
-                               do_div(tmp_hint_addr, va_range->page_size))) {
+               (!is_align_pow_2 && is_hint_dram_addr &&
+                       do_div(tmp_hint_addr, va_range->page_size))) {
+
+               if (force_hint) {
+                       /* Hint must be respected, so here we just fail */
+                       dev_err(hdev->dev,
+                               "Hint address 0x%llx is not page aligned - cannot be respected\n",
+                               hint_addr);
+                       return 0;
+               }
 
                dev_dbg(hdev->dev,
                        "Hint address 0x%llx will be ignored because it is not aligned\n",
@@ -596,6 +639,16 @@ static u64 get_va_block(struct hl_device *hdev,
                if (valid_size < size)
                        continue;
 
+               /*
+                * In case hint address is 0, and arc_hints_range_reservation
+                * property enabled, then avoid allocating va blocks from the
+                * range reserved for hint addresses
+                */
+               if (prop->hints_range_reservation && !hint_addr)
+                       if (is_hint_crossing_range(range_type, valid_start,
+                                       size, prop))
+                               continue;
+
                /* Pick the minimal length block which has the required size */
                if (!new_va_block || (valid_size < reserved_valid_size)) {
                        new_va_block = va_block;
@@ -618,6 +671,17 @@ static u64 get_va_block(struct hl_device *hdev,
                goto out;
        }
 
+       if (force_hint && reserved_valid_start != hint_addr) {
+               /* Hint address must be respected. If we are here - this means
+                * we could not respect it.
+                */
+               dev_err(hdev->dev,
+                       "Hint address 0x%llx could not be respected\n",
+                       hint_addr);
+               reserved_valid_start = 0;
+               goto out;
+       }
+
        /*
         * Check if there is some leftover range due to reserving the new
         * va block, then return it to the main virtual addresses list.
@@ -670,7 +734,8 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
                enum hl_va_range_type type, u32 size, u32 alignment)
 {
        return get_va_block(hdev, ctx->va_range[type], size, 0,
-                       max(alignment, ctx->va_range[type]->page_size));
+                       max(alignment, ctx->va_range[type]->page_size),
+                       type, 0);
 }
 
 /**
@@ -731,29 +796,16 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
        return rc;
 }
 
-/**
- * get_sg_info() - get number of pages and the DMA address from SG list.
- * @sg: the SG list.
- * @dma_addr: pointer to DMA address to return.
- *
- * Calculate the number of consecutive pages described by the SG list. Take the
- * offset of the address in the first page, add to it the length and round it up
- * to the number of needed pages.
- */
-static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
-{
-       *dma_addr = sg_dma_address(sg);
-
-       return ((((*dma_addr) & (PAGE_SIZE - 1)) + sg_dma_len(sg)) +
-                       (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-}
-
 /**
  * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
  *                                    memory
  * @ctx: pointer to the context structure.
  * @userptr: userptr to initialize from.
  * @pphys_pg_pack: result pointer.
+ * @force_regular_page: tell the function to ignore huge page optimization,
+ *                      even if possible. Needed for cases where the device VA
+ *                      is allocated before we know the composition of the
+ *                      physical pages
  *
  * This function does the following:
  * - Pin the physical pages related to the given virtual block.
@@ -762,17 +814,18 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
  */
 static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
                                struct hl_userptr *userptr,
-                               struct hl_vm_phys_pg_pack **pphys_pg_pack)
+                               struct hl_vm_phys_pg_pack **pphys_pg_pack,
+                               bool force_regular_page)
 {
+       u32 npages, page_size = PAGE_SIZE,
+               huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
+       u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
        struct hl_vm_phys_pg_pack *phys_pg_pack;
+       bool first = true, is_huge_page_opt;
+       u64 page_mask, total_npages;
        struct scatterlist *sg;
        dma_addr_t dma_addr;
-       u64 page_mask, total_npages;
-       u32 npages, page_size = PAGE_SIZE,
-               huge_page_size = ctx->hdev->asic_prop.pmmu_huge.page_size;
-       bool first = true, is_huge_page_opt = true;
        int rc, i, j;
-       u32 pgs_in_huge_page = huge_page_size >> __ffs(page_size);
 
        phys_pg_pack = kzalloc(sizeof(*phys_pg_pack), GFP_KERNEL);
        if (!phys_pg_pack)
@@ -783,6 +836,8 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
        phys_pg_pack->asid = ctx->asid;
        atomic_set(&phys_pg_pack->mapping_cnt, 1);
 
+       is_huge_page_opt = (force_regular_page ? false : true);
+
        /* Only if all dma_addrs are aligned to 2MB and their
         * sizes is at least 2MB, we can use huge page mapping.
         * We limit the 2MB optimization to this condition,
@@ -791,7 +846,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
         */
        total_npages = 0;
        for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
-               npages = get_sg_info(sg, &dma_addr);
+               npages = hl_get_sg_info(sg, &dma_addr);
 
                total_npages += npages;
 
@@ -820,7 +875,7 @@ static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
 
        j = 0;
        for_each_sg(userptr->sgt->sgl, sg, userptr->sgt->nents, i) {
-               npages = get_sg_info(sg, &dma_addr);
+               npages = hl_get_sg_info(sg, &dma_addr);
 
                /* align down to physical page size and save the offset */
                if (first) {
@@ -1001,11 +1056,12 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
        struct hl_userptr *userptr = NULL;
        struct hl_vm_hash_node *hnode;
        struct hl_va_range *va_range;
-       enum vm_type_t *vm_type;
+       enum vm_type *vm_type;
        u64 ret_vaddr, hint_addr;
        u32 handle = 0, va_block_align;
        int rc;
        bool is_userptr = args->flags & HL_MEM_USERPTR;
+       enum hl_va_range_type va_range_type = 0;
 
        /* Assume failure */
        *device_addr = 0;
@@ -1023,7 +1079,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
                }
 
                rc = init_phys_pg_pack_from_userptr(ctx, userptr,
-                               &phys_pg_pack);
+                               &phys_pg_pack, false);
                if (rc) {
                        dev_err(hdev->dev,
                                "unable to init page pack for vaddr 0x%llx\n",
@@ -1031,14 +1087,14 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
                        goto init_page_pack_err;
                }
 
-               vm_type = (enum vm_type_t *) userptr;
+               vm_type = (enum vm_type *) userptr;
                hint_addr = args->map_host.hint_addr;
                handle = phys_pg_pack->handle;
 
                /* get required alignment */
                if (phys_pg_pack->page_size == page_size) {
                        va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST];
-
+                       va_range_type = HL_VA_RANGE_TYPE_HOST;
                        /*
                         * huge page alignment may be needed in case of regular
                         * page mapping, depending on the host VA alignment
@@ -1053,6 +1109,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
                         * mapping
                         */
                        va_range = ctx->va_range[HL_VA_RANGE_TYPE_HOST_HUGE];
+                       va_range_type = HL_VA_RANGE_TYPE_HOST_HUGE;
                        va_block_align = huge_page_size;
                }
        } else {
@@ -1072,12 +1129,13 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 
                spin_unlock(&vm->idr_lock);
 
-               vm_type = (enum vm_type_t *) phys_pg_pack;
+               vm_type = (enum vm_type *) phys_pg_pack;
 
                hint_addr = args->map_device.hint_addr;
 
                /* DRAM VA alignment is the same as the MMU page size */
                va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
+               va_range_type = HL_VA_RANGE_TYPE_DRAM;
                va_block_align = hdev->asic_prop.dmmu.page_size;
        }
 
@@ -1100,8 +1158,23 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
                goto hnode_err;
        }
 
+       if (hint_addr && phys_pg_pack->offset) {
+               if (args->flags & HL_MEM_FORCE_HINT) {
+                       /* Fail if hint must be respected but it can't be */
+                       dev_err(hdev->dev,
+                               "Hint address 0x%llx cannot be respected because source memory is not aligned 0x%x\n",
+                               hint_addr, phys_pg_pack->offset);
+                       rc = -EINVAL;
+                       goto va_block_err;
+               }
+               dev_dbg(hdev->dev,
+                       "Hint address 0x%llx will be ignored because source memory is not aligned 0x%x\n",
+                       hint_addr, phys_pg_pack->offset);
+       }
+
        ret_vaddr = get_va_block(hdev, va_range, phys_pg_pack->total_size,
-                                       hint_addr, va_block_align);
+                                       hint_addr, va_block_align,
+                                       va_range_type, args->flags);
        if (!ret_vaddr) {
                dev_err(hdev->dev, "no available va block for handle %u\n",
                                handle);
@@ -1181,17 +1254,19 @@ init_page_pack_err:
 static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
                                bool ctx_free)
 {
-       struct hl_device *hdev = ctx->hdev;
-       struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
+       u64 vaddr = args->unmap.device_virt_addr;
        struct hl_vm_hash_node *hnode = NULL;
+       struct asic_fixed_properties *prop;
+       struct hl_device *hdev = ctx->hdev;
        struct hl_userptr *userptr = NULL;
        struct hl_va_range *va_range;
-       u64 vaddr = args->unmap.device_virt_addr;
-       enum vm_type_t *vm_type;
+       enum vm_type *vm_type;
        bool is_userptr;
        int rc = 0;
 
+       prop = &hdev->asic_prop;
+
        /* protect from double entrance */
        mutex_lock(&ctx->mem_hash_lock);
        hash_for_each_possible(ctx->mem_hash, hnode, node, (unsigned long)vaddr)
@@ -1214,8 +1289,9 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
        if (*vm_type == VM_TYPE_USERPTR) {
                is_userptr = true;
                userptr = hnode->ptr;
-               rc = init_phys_pg_pack_from_userptr(ctx, userptr,
-                                                       &phys_pg_pack);
+
+               rc = init_phys_pg_pack_from_userptr(ctx, userptr, &phys_pg_pack,
+                                                       false);
                if (rc) {
                        dev_err(hdev->dev,
                                "unable to init page pack for vaddr 0x%llx\n",
@@ -1299,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
        kfree(hnode);
 
        if (is_userptr) {
-               rc = free_phys_pg_pack(hdev, phys_pg_pack);
+               free_phys_pg_pack(hdev, phys_pg_pack);
                dma_unmap_host_va(hdev, userptr);
        }
 
@@ -1669,6 +1745,7 @@ int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
                return -EINVAL;
        }
 
+       userptr->pid = current->pid;
        userptr->sgt = kzalloc(sizeof(*userptr->sgt), GFP_KERNEL);
        if (!userptr->sgt)
                return -ENOMEM;
@@ -2033,7 +2110,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
         * another side effect error
         */
        if (!hdev->hard_reset_pending && !hash_empty(ctx->mem_hash))
-               dev_notice(hdev->dev,
+               dev_dbg(hdev->dev,
                        "user released device without removing its memory mappings\n");
 
        hash_for_each_safe(ctx->mem_hash, i, tmp_node, hnode, node) {
index c5e93ff..0f536f7 100644 (file)
@@ -470,13 +470,13 @@ static void hl_mmu_v1_fini(struct hl_device *hdev)
        if (!ZERO_OR_NULL_PTR(hdev->mmu_priv.hr.mmu_shadow_hop0)) {
                kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0);
                gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool);
-       }
 
-       /* Make sure that if we arrive here again without init was called we
-        * won't cause kernel panic. This can happen for example if we fail
-        * during hard reset code at certain points
-        */
-       hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
+               /* Make sure that if we arrive here again without init was
+                * called we won't cause kernel panic. This can happen for
+                * example if we fail during hard reset code at certain points
+                */
+               hdev->mmu_priv.dr.mmu_shadow_hop0 = NULL;
+       }
 }
 
 /**
index d5bedf5..0b5366c 100644 (file)
@@ -436,6 +436,8 @@ int hl_pci_init(struct hl_device *hdev)
                goto unmap_pci_bars;
        }
 
+       dma_set_max_seg_size(&pdev->dev, U32_MAX);
+
        return 0;
 
 unmap_pci_bars:
diff --git a/drivers/misc/habanalabs/common/state_dump.c b/drivers/misc/habanalabs/common/state_dump.c
new file mode 100644 (file)
index 0000000..7472690
--- /dev/null
@@ -0,0 +1,718 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2021 HabanaLabs, Ltd.
+ * All Rights Reserved.
+ */
+
+#include <linux/vmalloc.h>
+#include <uapi/misc/habanalabs.h>
+#include "habanalabs.h"
+
+/**
+ * hl_format_as_binary - helper function, format an integer as binary
+ *                       using supplied scratch buffer
+ * @buf: the buffer to use
+ * @buf_len: buffer capacity
+ * @n: number to format
+ *
+ * Returns pointer to buffer
+ */
+char *hl_format_as_binary(char *buf, size_t buf_len, u32 n)
+{
+       int i;
+       u32 bit;
+       bool leading0 = true;
+       char *wrptr = buf;
+
+       if (buf_len > 0 && buf_len < 3) {
+               *wrptr = '\0';
+               return buf;
+       }
+
+       wrptr[0] = '0';
+       wrptr[1] = 'b';
+       wrptr += 2;
+       /* Remove 3 characters from length for '0b' and '\0' termination */
+       buf_len -= 3;
+
+       for (i = 0; i < sizeof(n) * BITS_PER_BYTE && buf_len; ++i, n <<= 1) {
+               /* Writing bit calculation in one line would cause a false
+                * positive static code analysis error, so splitting.
+                */
+               bit = n & (1 << (sizeof(n) * BITS_PER_BYTE - 1));
+               bit = !!bit;
+               leading0 &= !bit;
+               if (!leading0) {
+                       *wrptr = '0' + bit;
+                       ++wrptr;
+               }
+       }
+
+       *wrptr = '\0';
+
+       return buf;
+}
+
+/**
+ * resize_to_fit - helper function, resize buffer to fit given amount of data
+ * @buf: destination buffer double pointer
+ * @size: pointer to the size container
+ * @desired_size: size the buffer must contain
+ *
+ * Returns 0 on success or error code on failure.
+ * On success, the size of buffer is at least desired_size. Buffer is allocated
+ * via vmalloc and must be freed with vfree.
+ */
+static int resize_to_fit(char **buf, size_t *size, size_t desired_size)
+{
+       char *resized_buf;
+       size_t new_size;
+
+       if (*size >= desired_size)
+               return 0;
+
+       /* Not enough space to print all, have to resize */
+       new_size = max_t(size_t, PAGE_SIZE, round_up(desired_size, PAGE_SIZE));
+       resized_buf = vmalloc(new_size);
+       if (!resized_buf)
+               return -ENOMEM;
+       memcpy(resized_buf, *buf, *size);
+       vfree(*buf);
+       *buf = resized_buf;
+       *size = new_size;
+
+       return 1;
+}
+
+/**
+ * hl_snprintf_resize() - print formatted data to buffer, resize as needed
+ * @buf: buffer double pointer, to be written to and resized, must be either
+ *       NULL or allocated with vmalloc.
+ * @size: current size of the buffer
+ * @offset: current offset to write to
+ * @format: format of the data
+ *
+ * This function will write formatted data into the buffer. If buffer is not
+ * large enough, it will be resized using vmalloc. Size may be modified if the
+ * buffer was resized, offset will be advanced by the number of bytes written
+ * not including the terminating character
+ *
+ * Returns 0 on success or error code on failure
+ *
+ * Note that the buffer has to be manually released using vfree.
+ */
+int hl_snprintf_resize(char **buf, size_t *size, size_t *offset,
+                          const char *format, ...)
+{
+       va_list args;
+       size_t length;
+       int rc;
+
+       if (*buf == NULL && (*size != 0 || *offset != 0))
+               return -EINVAL;
+
+       va_start(args, format);
+       length = vsnprintf(*buf + *offset, *size - *offset, format, args);
+       va_end(args);
+
+       rc = resize_to_fit(buf, size, *offset + length + 1);
+       if (rc < 0)
+               return rc;
+       else if (rc > 0) {
+               /* Resize was needed, write again */
+               va_start(args, format);
+               length = vsnprintf(*buf + *offset, *size - *offset, format,
+                                  args);
+               va_end(args);
+       }
+
+       *offset += length;
+
+       return 0;
+}
+
+/**
+ * hl_sync_engine_to_string - convert engine type enum to string literal
+ * @engine_type: engine type (TPC/MME/DMA)
+ *
+ * Return the resolved string literal
+ */
+const char *hl_sync_engine_to_string(enum hl_sync_engine_type engine_type)
+{
+       switch (engine_type) {
+       case ENGINE_DMA:
+               return "DMA";
+       case ENGINE_MME:
+               return "MME";
+       case ENGINE_TPC:
+               return "TPC";
+       }
+       return "Invalid Engine Type";
+}
+
+/**
+ * hl_print_resize_sync_engine - helper function, format engine name and ID
+ * using hl_snprintf_resize
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ * @engine_type: engine type (TPC/MME/DMA)
+ * @engine_id: engine numerical id
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int hl_print_resize_sync_engine(char **buf, size_t *size, size_t *offset,
+                               enum hl_sync_engine_type engine_type,
+                               u32 engine_id)
+{
+       return hl_snprintf_resize(buf, size, offset, "%s%u",
+                       hl_sync_engine_to_string(engine_type), engine_id);
+}
+
+/**
+ * hl_state_dump_get_sync_name - transform sync object id to name if available
+ * @hdev: pointer to the device
+ * @sync_id: sync object id
+ *
+ * Returns a name literal or NULL if not resolved.
+ * Note: returning NULL shall not be considered as a failure, as not all
+ * sync objects are named.
+ */
+const char *hl_state_dump_get_sync_name(struct hl_device *hdev, u32 sync_id)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       struct hl_hw_obj_name_entry *entry;
+
+       hash_for_each_possible(sds->so_id_to_str_tb, entry,
+                               node, sync_id)
+               if (sync_id == entry->id)
+                       return entry->name;
+
+       return NULL;
+}
+
+/**
+ * hl_state_dump_get_monitor_name - transform monitor object dump to monitor
+ * name if available
+ * @hdev: pointer to the device
+ * @mon: monitor state dump
+ *
+ * Returns a name literal or NULL if not resolved.
+ * Note: returning NULL shall not be considered as a failure, as not all
+ * monitors are named.
+ */
+const char *hl_state_dump_get_monitor_name(struct hl_device *hdev,
+                                       struct hl_mon_state_dump *mon)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       struct hl_hw_obj_name_entry *entry;
+
+       hash_for_each_possible(sds->monitor_id_to_str_tb,
+                               entry, node, mon->id)
+               if (mon->id == entry->id)
+                       return entry->name;
+
+       return NULL;
+}
+
+/**
+ * hl_state_dump_free_sync_to_engine_map - free sync object to engine map
+ * @map: sync object to engine map
+ *
+ * Note: generic free implementation, the allocation is implemented per ASIC.
+ */
+void hl_state_dump_free_sync_to_engine_map(struct hl_sync_to_engine_map *map)
+{
+       struct hl_sync_to_engine_map_entry *entry;
+       struct hlist_node *tmp_node;
+       int i;
+
+       hash_for_each_safe(map->tb, i, tmp_node, entry, node) {
+               hash_del(&entry->node);
+               kfree(entry);
+       }
+}
+
+/**
+ * hl_state_dump_get_sync_to_engine - transform sync_id to
+ * hl_sync_to_engine_map_entry if available for current id
+ * @map: sync object to engine map
+ * @sync_id: sync object id
+ *
+ * Returns the translation entry if found or NULL if not.
+ * Note, returned NULL shall not be considered as a failure as the map
+ * does not cover all possible, it is a best effort sync ids.
+ */
+static struct hl_sync_to_engine_map_entry *
+hl_state_dump_get_sync_to_engine(struct hl_sync_to_engine_map *map, u32 sync_id)
+{
+       struct hl_sync_to_engine_map_entry *entry;
+
+       hash_for_each_possible(map->tb, entry, node, sync_id)
+               if (entry->sync_id == sync_id)
+                       return entry;
+       return NULL;
+}
+
+/**
+ * hl_state_dump_read_sync_objects - read sync objects array
+ * @hdev: pointer to the device
+ * @index: sync manager block index starting with E_N
+ *
+ * Returns array of size SP_SYNC_OBJ_AMOUNT on success or NULL on failure
+ */
+static u32 *hl_state_dump_read_sync_objects(struct hl_device *hdev, u32 index)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       u32 *sync_objects;
+       s64 base_addr; /* Base addr can be negative */
+       int i;
+
+       base_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] +
+                       sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index;
+
+       sync_objects = vmalloc(sds->props[SP_SYNC_OBJ_AMOUNT] * sizeof(u32));
+       if (!sync_objects)
+               return NULL;
+
+       for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i)
+               sync_objects[i] = RREG32(base_addr + i * sizeof(u32));
+
+       return sync_objects;
+}
+
+/**
+ * hl_state_dump_free_sync_objects - free sync objects array allocated by
+ * hl_state_dump_read_sync_objects
+ * @sync_objects: sync objects array
+ */
+static void hl_state_dump_free_sync_objects(u32 *sync_objects)
+{
+       vfree(sync_objects);
+}
+
+
+/**
+ * hl_state_dump_print_syncs_single_block - print active sync objects on a
+ * single block
+ * @hdev: pointer to the device
+ * @index: sync manager block index starting with E_N
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ * @map: sync engines names map
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int
+hl_state_dump_print_syncs_single_block(struct hl_device *hdev, u32 index,
+                               char **buf, size_t *size, size_t *offset,
+                               struct hl_sync_to_engine_map *map)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       const char *sync_name;
+       u32 *sync_objects = NULL;
+       int rc = 0, i;
+
+       if (sds->sync_namager_names) {
+               rc = hl_snprintf_resize(
+                       buf, size, offset, "%s\n",
+                       sds->sync_namager_names[index]);
+               if (rc)
+                       goto out;
+       }
+
+       sync_objects = hl_state_dump_read_sync_objects(hdev, index);
+       if (!sync_objects) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       for (i = 0; i < sds->props[SP_SYNC_OBJ_AMOUNT]; ++i) {
+               struct hl_sync_to_engine_map_entry *entry;
+               u64 sync_object_addr;
+
+               if (!sync_objects[i])
+                       continue;
+
+               sync_object_addr = sds->props[SP_SYNC_OBJ_BASE_ADDR] +
+                               sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index +
+                               i * sizeof(u32);
+
+               rc = hl_snprintf_resize(buf, size, offset, "sync id: %u", i);
+               if (rc)
+                       goto free_sync_objects;
+               sync_name = hl_state_dump_get_sync_name(hdev, i);
+               if (sync_name) {
+                       rc = hl_snprintf_resize(buf, size, offset, " %s",
+                                               sync_name);
+                       if (rc)
+                               goto free_sync_objects;
+               }
+               rc = hl_snprintf_resize(buf, size, offset, ", value: %u",
+                                       sync_objects[i]);
+               if (rc)
+                       goto free_sync_objects;
+
+               /* Append engine string */
+               entry = hl_state_dump_get_sync_to_engine(map,
+                       (u32)sync_object_addr);
+               if (entry) {
+                       rc = hl_snprintf_resize(buf, size, offset,
+                                               ", Engine: ");
+                       if (rc)
+                               goto free_sync_objects;
+                       rc = hl_print_resize_sync_engine(buf, size, offset,
+                                               entry->engine_type,
+                                               entry->engine_id);
+                       if (rc)
+                               goto free_sync_objects;
+               }
+
+               rc = hl_snprintf_resize(buf, size, offset, "\n");
+               if (rc)
+                       goto free_sync_objects;
+       }
+
+free_sync_objects:
+       hl_state_dump_free_sync_objects(sync_objects);
+out:
+       return rc;
+}
+
+/**
+ * hl_state_dump_print_syncs - print active sync objects
+ * @hdev: pointer to the device
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int hl_state_dump_print_syncs(struct hl_device *hdev,
+                                       char **buf, size_t *size,
+                                       size_t *offset)
+
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       struct hl_sync_to_engine_map *map;
+       u32 index;
+       int rc = 0;
+
+       map = kzalloc(sizeof(*map), GFP_KERNEL);
+       if (!map)
+               return -ENOMEM;
+
+       rc = sds->funcs.gen_sync_to_engine_map(hdev, map);
+       if (rc)
+               goto free_map_mem;
+
+       rc = hl_snprintf_resize(buf, size, offset, "Non zero sync objects:\n");
+       if (rc)
+               goto out;
+
+       if (sds->sync_namager_names) {
+               for (index = 0; sds->sync_namager_names[index]; ++index) {
+                       rc = hl_state_dump_print_syncs_single_block(
+                               hdev, index, buf, size, offset, map);
+                       if (rc)
+                               goto out;
+               }
+       } else {
+               for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) {
+                       rc = hl_state_dump_print_syncs_single_block(
+                               hdev, index, buf, size, offset, map);
+                       if (rc)
+                               goto out;
+               }
+       }
+
+out:
+       hl_state_dump_free_sync_to_engine_map(map);
+free_map_mem:
+       kfree(map);
+
+       return rc;
+}
+
+/**
+ * hl_state_dump_alloc_read_sm_block_monitors - read monitors for a specific
+ * block
+ * @hdev: pointer to the device
+ * @index: sync manager block index starting with E_N
+ *
+ * Returns an array of monitor data of size SP_MONITORS_AMOUNT or NULL
+ * on error
+ */
+static struct hl_mon_state_dump *
+hl_state_dump_alloc_read_sm_block_monitors(struct hl_device *hdev, u32 index)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       struct hl_mon_state_dump *monitors;
+       s64 base_addr; /* Base addr can be negative */
+       int i;
+
+       monitors = vmalloc(sds->props[SP_MONITORS_AMOUNT] *
+                          sizeof(struct hl_mon_state_dump));
+       if (!monitors)
+               return NULL;
+
+       base_addr = sds->props[SP_NEXT_SYNC_OBJ_ADDR] * index;
+
+       for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) {
+               monitors[i].id = i;
+               monitors[i].wr_addr_low =
+                       RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_LOW] +
+                               i * sizeof(u32));
+
+               monitors[i].wr_addr_high =
+                       RREG32(base_addr + sds->props[SP_MON_OBJ_WR_ADDR_HIGH] +
+                               i * sizeof(u32));
+
+               monitors[i].wr_data =
+                       RREG32(base_addr + sds->props[SP_MON_OBJ_WR_DATA] +
+                               i * sizeof(u32));
+
+               monitors[i].arm_data =
+                       RREG32(base_addr + sds->props[SP_MON_OBJ_ARM_DATA] +
+                               i * sizeof(u32));
+
+               monitors[i].status =
+                       RREG32(base_addr + sds->props[SP_MON_OBJ_STATUS] +
+                               i * sizeof(u32));
+       }
+
+       return monitors;
+}
+
+/**
+ * hl_state_dump_free_monitors - free the monitors structure
+ * @monitors: monitors array created with
+ *            hl_state_dump_alloc_read_sm_block_monitors
+ */
+static void hl_state_dump_free_monitors(struct hl_mon_state_dump *monitors)
+{
+       vfree(monitors);
+}
+
+/**
+ * hl_state_dump_print_monitors_single_block - print active monitors on a
+ * single block
+ * @hdev: pointer to the device
+ * @index: sync manager block index starting with E_N
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int hl_state_dump_print_monitors_single_block(struct hl_device *hdev,
+                                               u32 index,
+                                               char **buf, size_t *size,
+                                               size_t *offset)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       struct hl_mon_state_dump *monitors = NULL;
+       int rc = 0, i;
+
+       if (sds->sync_namager_names) {
+               rc = hl_snprintf_resize(
+                       buf, size, offset, "%s\n",
+                       sds->sync_namager_names[index]);
+               if (rc)
+                       goto out;
+       }
+
+       monitors = hl_state_dump_alloc_read_sm_block_monitors(hdev, index);
+       if (!monitors) {
+               rc = -ENOMEM;
+               goto out;
+       }
+
+       for (i = 0; i < sds->props[SP_MONITORS_AMOUNT]; ++i) {
+               if (!(sds->funcs.monitor_valid(&monitors[i])))
+                       continue;
+
+               /* Monitor is valid, dump it */
+               rc = sds->funcs.print_single_monitor(buf, size, offset, hdev,
+                                                       &monitors[i]);
+               if (rc)
+                       goto free_monitors;
+
+               hl_snprintf_resize(buf, size, offset, "\n");
+       }
+
+free_monitors:
+       hl_state_dump_free_monitors(monitors);
+out:
+       return rc;
+}
+
+/**
+ * hl_state_dump_print_monitors - print active monitors
+ * @hdev: pointer to the device
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ *
+ * Returns 0 on success or error code on failure
+ */
+static int hl_state_dump_print_monitors(struct hl_device *hdev,
+                                       char **buf, size_t *size,
+                                       size_t *offset)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       u32 index;
+       int rc = 0;
+
+       rc = hl_snprintf_resize(buf, size, offset,
+               "Valid (armed) monitor objects:\n");
+       if (rc)
+               goto out;
+
+       if (sds->sync_namager_names) {
+               for (index = 0; sds->sync_namager_names[index]; ++index) {
+                       rc = hl_state_dump_print_monitors_single_block(
+                               hdev, index, buf, size, offset);
+                       if (rc)
+                               goto out;
+               }
+       } else {
+               for (index = 0; index < sds->props[SP_NUM_CORES]; ++index) {
+                       rc = hl_state_dump_print_monitors_single_block(
+                               hdev, index, buf, size, offset);
+                       if (rc)
+                               goto out;
+               }
+       }
+
+out:
+       return rc;
+}
+
+/**
+ * hl_state_dump_print_engine_fences - print active fences for a specific
+ * engine
+ * @hdev: pointer to the device
+ * @engine_type: engine type to use
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ */
+static int
+hl_state_dump_print_engine_fences(struct hl_device *hdev,
+                                 enum hl_sync_engine_type engine_type,
+                                 char **buf, size_t *size, size_t *offset)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       int rc = 0, i, n_fences;
+       u64 base_addr, next_fence;
+
+       switch (engine_type) {
+       case ENGINE_TPC:
+               n_fences = sds->props[SP_NUM_OF_TPC_ENGINES];
+               base_addr = sds->props[SP_TPC0_CMDQ];
+               next_fence = sds->props[SP_NEXT_TPC];
+               break;
+       case ENGINE_MME:
+               n_fences = sds->props[SP_NUM_OF_MME_ENGINES];
+               base_addr = sds->props[SP_MME_CMDQ];
+               next_fence = sds->props[SP_NEXT_MME];
+               break;
+       case ENGINE_DMA:
+               n_fences = sds->props[SP_NUM_OF_DMA_ENGINES];
+               base_addr = sds->props[SP_DMA_CMDQ];
+               next_fence = sds->props[SP_DMA_QUEUES_OFFSET];
+               break;
+       default:
+               return -EINVAL;
+       }
+       for (i = 0; i < n_fences; ++i) {
+               rc = sds->funcs.print_fences_single_engine(
+                       hdev,
+                       base_addr + next_fence * i +
+                               sds->props[SP_FENCE0_CNT_OFFSET],
+                       base_addr + next_fence * i +
+                               sds->props[SP_CP_STS_OFFSET],
+                       engine_type, i, buf, size, offset);
+               if (rc)
+                       goto out;
+       }
+out:
+       return rc;
+}
+
+/**
+ * hl_state_dump_print_fences - print active fences
+ * @hdev: pointer to the device
+ * @buf: destination buffer double pointer to be used with hl_snprintf_resize
+ * @size: pointer to the size container
+ * @offset: pointer to the offset container
+ */
+static int hl_state_dump_print_fences(struct hl_device *hdev, char **buf,
+                                     size_t *size, size_t *offset)
+{
+       int rc = 0;
+
+       rc = hl_snprintf_resize(buf, size, offset, "Valid (armed) fences:\n");
+       if (rc)
+               goto out;
+
+       rc = hl_state_dump_print_engine_fences(hdev, ENGINE_TPC, buf, size, offset);
+       if (rc)
+               goto out;
+
+       rc = hl_state_dump_print_engine_fences(hdev, ENGINE_MME, buf, size, offset);
+       if (rc)
+               goto out;
+
+       rc = hl_state_dump_print_engine_fences(hdev, ENGINE_DMA, buf, size, offset);
+       if (rc)
+               goto out;
+
+out:
+       return rc;
+}
+
+/**
+ * hl_state_dump() - dump system state
+ * @hdev: pointer to device structure
+ */
+int hl_state_dump(struct hl_device *hdev)
+{
+       char *buf = NULL;
+       size_t offset = 0, size = 0;
+       int rc;
+
+       rc = hl_snprintf_resize(&buf, &size, &offset,
+                               "Timestamp taken on: %llu\n\n",
+                               ktime_to_ns(ktime_get()));
+       if (rc)
+               goto err;
+
+       rc = hl_state_dump_print_syncs(hdev, &buf, &size, &offset);
+       if (rc)
+               goto err;
+
+       hl_snprintf_resize(&buf, &size, &offset, "\n");
+
+       rc = hl_state_dump_print_monitors(hdev, &buf, &size, &offset);
+       if (rc)
+               goto err;
+
+       hl_snprintf_resize(&buf, &size, &offset, "\n");
+
+       rc = hl_state_dump_print_fences(hdev, &buf, &size, &offset);
+       if (rc)
+               goto err;
+
+       hl_snprintf_resize(&buf, &size, &offset, "\n");
+
+       hl_debugfs_set_state_dump(hdev, buf, size);
+
+       return 0;
+err:
+       vfree(buf);
+       return rc;
+}
index db72df2..34f9f27 100644 (file)
@@ -9,8 +9,7 @@
 
 #include <linux/pci.h>
 
-long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
-                                                               bool curr)
+long hl_get_frequency(struct hl_device *hdev, u32 pll_index, bool curr)
 {
        struct cpucp_packet pkt;
        u32 used_pll_idx;
@@ -44,8 +43,7 @@ long hl_get_frequency(struct hl_device *hdev, u32 pll_index,
        return (long) result;
 }
 
-void hl_set_frequency(struct hl_device *hdev, u32 pll_index,
-                                                               u64 freq)
+void hl_set_frequency(struct hl_device *hdev, u32 pll_index, u64 freq)
 {
        struct cpucp_packet pkt;
        u32 used_pll_idx;
@@ -285,16 +283,12 @@ static ssize_t status_show(struct device *dev, struct device_attribute *attr,
                                char *buf)
 {
        struct hl_device *hdev = dev_get_drvdata(dev);
-       char *str;
+       char str[HL_STR_MAX];
 
-       if (atomic_read(&hdev->in_reset))
-               str = "In reset";
-       else if (hdev->disabled)
-               str = "Malfunction";
-       else if (hdev->needs_reset)
-               str = "Needs Reset";
-       else
-               str = "Operational";
+       strscpy(str, hdev->status[hl_device_status(hdev)], HL_STR_MAX);
+
+       /* use uppercase for backward compatibility */
+       str[0] = 'A' + (str[0] - 'a');
 
        return sprintf(buf, "%s\n", str);
 }
index aa8a0ca..383865b 100644 (file)
@@ -76,7 +76,7 @@
 #define GAUDI_PLDM_MMU_TIMEOUT_USEC    (MMU_CONFIG_TIMEOUT_USEC * 100)
 #define GAUDI_PLDM_QMAN0_TIMEOUT_USEC  (HL_DEVICE_TIMEOUT_USEC * 30)
 #define GAUDI_PLDM_TPC_KERNEL_WAIT_USEC        (HL_DEVICE_TIMEOUT_USEC * 30)
-#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC        1000000         /* 1s */
+#define GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC        4000000         /* 4s */
 #define GAUDI_MSG_TO_CPU_TIMEOUT_USEC  4000000         /* 4s */
 #define GAUDI_WAIT_FOR_BL_TIMEOUT_USEC 15000000        /* 15s */
 
 
 #define GAUDI_PLL_MAX 10
 
+#define BIN_REG_STRING_SIZE    sizeof("0b10101010101010101010101010101010")
+
+#define MONITOR_SOB_STRING_SIZE                256
+
+static u32 gaudi_stream_master[GAUDI_STREAM_MASTER_ARR_SIZE] = {
+       GAUDI_QUEUE_ID_DMA_0_0,
+       GAUDI_QUEUE_ID_DMA_0_1,
+       GAUDI_QUEUE_ID_DMA_0_2,
+       GAUDI_QUEUE_ID_DMA_0_3,
+       GAUDI_QUEUE_ID_DMA_1_0,
+       GAUDI_QUEUE_ID_DMA_1_1,
+       GAUDI_QUEUE_ID_DMA_1_2,
+       GAUDI_QUEUE_ID_DMA_1_3
+};
+
 static const char gaudi_irq_name[GAUDI_MSI_ENTRIES][GAUDI_MAX_STRING_LEN] = {
                "gaudi cq 0_0", "gaudi cq 0_1", "gaudi cq 0_2", "gaudi cq 0_3",
                "gaudi cq 1_0", "gaudi cq 1_1", "gaudi cq 1_2", "gaudi cq 1_3",
@@ -348,6 +363,97 @@ static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
        QUEUE_TYPE_INT, /* GAUDI_QUEUE_ID_NIC_9_3 */
 };
 
+static struct hl_hw_obj_name_entry gaudi_so_id_to_str[] = {
+       { .id = 0,  .name = "SYNC_OBJ_DMA_DOWN_FEEDBACK" },
+       { .id = 1,  .name = "SYNC_OBJ_DMA_UP_FEEDBACK" },
+       { .id = 2,  .name = "SYNC_OBJ_DMA_STATIC_DRAM_SRAM_FEEDBACK" },
+       { .id = 3,  .name = "SYNC_OBJ_DMA_SRAM_DRAM_FEEDBACK" },
+       { .id = 4,  .name = "SYNC_OBJ_FIRST_COMPUTE_FINISH" },
+       { .id = 5,  .name = "SYNC_OBJ_HOST_DRAM_DONE" },
+       { .id = 6,  .name = "SYNC_OBJ_DBG_CTR_DEPRECATED" },
+       { .id = 7,  .name = "SYNC_OBJ_DMA_ACTIVATIONS_DRAM_SRAM_FEEDBACK" },
+       { .id = 8,  .name = "SYNC_OBJ_ENGINE_SEM_MME_0" },
+       { .id = 9,  .name = "SYNC_OBJ_ENGINE_SEM_MME_1" },
+       { .id = 10, .name = "SYNC_OBJ_ENGINE_SEM_TPC_0" },
+       { .id = 11, .name = "SYNC_OBJ_ENGINE_SEM_TPC_1" },
+       { .id = 12, .name = "SYNC_OBJ_ENGINE_SEM_TPC_2" },
+       { .id = 13, .name = "SYNC_OBJ_ENGINE_SEM_TPC_3" },
+       { .id = 14, .name = "SYNC_OBJ_ENGINE_SEM_TPC_4" },
+       { .id = 15, .name = "SYNC_OBJ_ENGINE_SEM_TPC_5" },
+       { .id = 16, .name = "SYNC_OBJ_ENGINE_SEM_TPC_6" },
+       { .id = 17, .name = "SYNC_OBJ_ENGINE_SEM_TPC_7" },
+       { .id = 18, .name = "SYNC_OBJ_ENGINE_SEM_DMA_1" },
+       { .id = 19, .name = "SYNC_OBJ_ENGINE_SEM_DMA_2" },
+       { .id = 20, .name = "SYNC_OBJ_ENGINE_SEM_DMA_3" },
+       { .id = 21, .name = "SYNC_OBJ_ENGINE_SEM_DMA_4" },
+       { .id = 22, .name = "SYNC_OBJ_ENGINE_SEM_DMA_5" },
+       { .id = 23, .name = "SYNC_OBJ_ENGINE_SEM_DMA_6" },
+       { .id = 24, .name = "SYNC_OBJ_ENGINE_SEM_DMA_7" },
+       { .id = 25, .name = "SYNC_OBJ_DBG_CTR_0" },
+       { .id = 26, .name = "SYNC_OBJ_DBG_CTR_1" },
+};
+
+static struct hl_hw_obj_name_entry gaudi_monitor_id_to_str[] = {
+       { .id = 200, .name = "MON_OBJ_DMA_DOWN_FEEDBACK_RESET" },
+       { .id = 201, .name = "MON_OBJ_DMA_UP_FEADBACK_RESET" },
+       { .id = 203, .name = "MON_OBJ_DRAM_TO_SRAM_QUEUE_FENCE" },
+       { .id = 204, .name = "MON_OBJ_TPC_0_CLK_GATE" },
+       { .id = 205, .name = "MON_OBJ_TPC_1_CLK_GATE" },
+       { .id = 206, .name = "MON_OBJ_TPC_2_CLK_GATE" },
+       { .id = 207, .name = "MON_OBJ_TPC_3_CLK_GATE" },
+       { .id = 208, .name = "MON_OBJ_TPC_4_CLK_GATE" },
+       { .id = 209, .name = "MON_OBJ_TPC_5_CLK_GATE" },
+       { .id = 210, .name = "MON_OBJ_TPC_6_CLK_GATE" },
+       { .id = 211, .name = "MON_OBJ_TPC_7_CLK_GATE" },
+};
+
+static s64 gaudi_state_dump_specs_props[] = {
+       [SP_SYNC_OBJ_BASE_ADDR] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0,
+       [SP_NEXT_SYNC_OBJ_ADDR] = NEXT_SYNC_OBJ_ADDR_INTERVAL,
+       [SP_SYNC_OBJ_AMOUNT] = NUM_OF_SOB_IN_BLOCK,
+       [SP_MON_OBJ_WR_ADDR_LOW] =
+               mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0,
+       [SP_MON_OBJ_WR_ADDR_HIGH] =
+               mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0,
+       [SP_MON_OBJ_WR_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0,
+       [SP_MON_OBJ_ARM_DATA] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0,
+       [SP_MON_OBJ_STATUS] = mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0,
+       [SP_MONITORS_AMOUNT] = NUM_OF_MONITORS_IN_BLOCK,
+       [SP_TPC0_CMDQ] = mmTPC0_QM_GLBL_CFG0,
+       [SP_TPC0_CFG_SO] = mmTPC0_CFG_QM_SYNC_OBJECT_ADDR,
+       [SP_NEXT_TPC] = mmTPC1_QM_GLBL_CFG0 - mmTPC0_QM_GLBL_CFG0,
+       [SP_MME_CMDQ] = mmMME0_QM_GLBL_CFG0,
+       [SP_MME_CFG_SO] = mmMME0_CTRL_ARCH_DESC_SYNC_OBJECT_ADDR_LOW_LOCAL,
+       [SP_NEXT_MME] = mmMME2_QM_GLBL_CFG0 - mmMME0_QM_GLBL_CFG0,
+       [SP_DMA_CMDQ] = mmDMA0_QM_GLBL_CFG0,
+       [SP_DMA_CFG_SO] = mmDMA0_CORE_WR_COMP_ADDR_LO,
+       [SP_DMA_QUEUES_OFFSET] = mmDMA1_QM_GLBL_CFG0 - mmDMA0_QM_GLBL_CFG0,
+       [SP_NUM_OF_MME_ENGINES] = NUM_OF_MME_ENGINES,
+       [SP_SUB_MME_ENG_NUM] = NUM_OF_MME_SUB_ENGINES,
+       [SP_NUM_OF_DMA_ENGINES] = NUM_OF_DMA_ENGINES,
+       [SP_NUM_OF_TPC_ENGINES] = NUM_OF_TPC_ENGINES,
+       [SP_ENGINE_NUM_OF_QUEUES] = NUM_OF_QUEUES,
+       [SP_ENGINE_NUM_OF_STREAMS] = NUM_OF_STREAMS,
+       [SP_ENGINE_NUM_OF_FENCES] = NUM_OF_FENCES,
+       [SP_FENCE0_CNT_OFFSET] =
+               mmDMA0_QM_CP_FENCE0_CNT_0 - mmDMA0_QM_GLBL_CFG0,
+       [SP_FENCE0_RDATA_OFFSET] =
+               mmDMA0_QM_CP_FENCE0_RDATA_0 - mmDMA0_QM_GLBL_CFG0,
+       [SP_CP_STS_OFFSET] = mmDMA0_QM_CP_STS_0 - mmDMA0_QM_GLBL_CFG0,
+       [SP_NUM_CORES] = 1,
+};
+
+/* The order here is opposite to the order of the indexing in the h/w.
+ * i.e. SYNC_MGR_W_S is actually 0, SYNC_MGR_E_S is 1, etc.
+ */
+static const char * const gaudi_sync_manager_names[] = {
+       "SYNC_MGR_E_N",
+       "SYNC_MGR_W_N",
+       "SYNC_MGR_E_S",
+       "SYNC_MGR_W_S",
+       NULL
+};
+
 struct ecc_info_extract_params {
        u64 block_address;
        u32 num_memories;
@@ -363,8 +469,6 @@ static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
                                        u32 size, u64 val);
 static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
                                        u32 num_regs, u32 val);
-static int gaudi_schedule_register_memset(struct hl_device *hdev,
-               u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
                                u32 tpc_id);
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
@@ -375,7 +479,6 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
                                u32 size, bool eb);
 static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
                                struct hl_gen_wait_properties *prop);
-
 static inline enum hl_collective_mode
 get_collective_mode(struct hl_device *hdev, u32 queue_id)
 {
@@ -403,7 +506,11 @@ static inline void set_default_power_values(struct hl_device *hdev)
 
        if (hdev->card_type == cpucp_card_type_pmc) {
                prop->max_power_default = MAX_POWER_DEFAULT_PMC;
-               prop->dc_power_default = DC_POWER_DEFAULT_PMC;
+
+               if (prop->fw_security_enabled)
+                       prop->dc_power_default = DC_POWER_DEFAULT_PMC_SEC;
+               else
+                       prop->dc_power_default = DC_POWER_DEFAULT_PMC;
        } else {
                prop->max_power_default = MAX_POWER_DEFAULT_PCI;
                prop->dc_power_default = DC_POWER_DEFAULT_PCI;
@@ -450,6 +557,7 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
                                                get_collective_mode(hdev, i);
        }
 
+       prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
        prop->collective_first_sob = 0;
        prop->collective_first_mon = 0;
@@ -551,6 +659,8 @@ static int gaudi_set_fixed_properties(struct hl_device *hdev)
        prop->hard_reset_done_by_fw = false;
        prop->gic_interrupts_enable = true;
 
+       prop->server_type = HL_SERVER_TYPE_UNKNOWN;
+
        return 0;
 }
 
@@ -723,14 +833,14 @@ pci_init:
                                        GAUDI_BOOT_FIT_REQ_TIMEOUT_USEC);
        if (rc) {
                if (hdev->reset_on_preboot_fail)
-                       hdev->asic_funcs->hw_fini(hdev, true);
+                       hdev->asic_funcs->hw_fini(hdev, true, false);
                goto pci_fini;
        }
 
        if (gaudi_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
                dev_info(hdev->dev,
                        "H/W state is dirty, must reset before initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true);
+               hdev->asic_funcs->hw_fini(hdev, true, false);
        }
 
        return 0;
@@ -974,17 +1084,11 @@ static void gaudi_sob_group_hw_reset(struct kref *ref)
        struct gaudi_hw_sob_group *hw_sob_group =
                container_of(ref, struct gaudi_hw_sob_group, kref);
        struct hl_device *hdev = hw_sob_group->hdev;
-       u64 base_addr;
-       int rc;
+       int i;
 
-       base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-                       hw_sob_group->base_sob_id * 4;
-       rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
-                       base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
-       if (rc)
-               dev_err(hdev->dev,
-                       "failed resetting sob group - sob base %u, count %u",
-                       hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
+       for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
+               WREG32((mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+                       (hw_sob_group->base_sob_id * 4) + (i * 4)), 0);
 
        kref_init(&hw_sob_group->kref);
 }
@@ -1121,6 +1225,20 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
        queue_id = job->hw_queue_id;
        prop = &hdev->kernel_queues[queue_id].sync_stream_prop;
 
+       if (job->cs->encaps_signals) {
+               /* use the encaps signal handle store earlier in the flow
+                * and set the SOB information from the encaps
+                * signals handle
+                */
+               hl_hw_queue_encaps_sig_set_sob_info(hdev, job->cs, job,
+                                               cs_cmpl);
+
+               dev_dbg(hdev->dev, "collective wait: Sequence %llu found, sob_id: %u,  wait for sob_val: %u\n",
+                               job->cs->sequence,
+                               cs_cmpl->hw_sob->sob_id,
+                               cs_cmpl->sob_val);
+       }
+
        /* Add to wait CBs using slave monitor */
        wait_prop.data = (void *) job->user_cb;
        wait_prop.sob_base = cs_cmpl->hw_sob->sob_id;
@@ -1131,7 +1249,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
        wait_prop.size = cb_size;
 
        dev_dbg(hdev->dev,
-               "Generate slave wait CB, sob %d, val:0x%x, mon %d, q %d\n",
+               "Generate slave wait CB, sob %d, val:%x, mon %d, q %d\n",
                cs_cmpl->hw_sob->sob_id, cs_cmpl->sob_val,
                prop->collective_slave_mon_id, queue_id);
 
@@ -1145,7 +1263,7 @@ static void gaudi_collective_slave_init_job(struct hl_device *hdev,
                        prop->collective_sob_id, cb_size, false);
 }
 
-static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
+static int gaudi_collective_wait_init_cs(struct hl_cs *cs)
 {
        struct hl_cs_compl *signal_cs_cmpl =
                container_of(cs->signal_fence, struct hl_cs_compl, base_fence);
@@ -1163,9 +1281,37 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
        gaudi = hdev->asic_specific;
        cprop = &gaudi->collective_props;
 
-       /* copy the SOB id and value of the signal CS */
-       cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
-       cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+       /* In encaps signals case the SOB info will be retrieved from
+        * the handle in gaudi_collective_slave_init_job.
+        */
+       if (!cs->encaps_signals) {
+               /* copy the SOB id and value of the signal CS */
+               cs_cmpl->hw_sob = signal_cs_cmpl->hw_sob;
+               cs_cmpl->sob_val = signal_cs_cmpl->sob_val;
+       }
+
+       /* check again if the signal cs already completed.
+        * if yes then don't send any wait cs since the hw_sob
+        * could be in reset already. if signal is not completed
+        * then get refcount to hw_sob to prevent resetting the sob
+        * while wait cs is not submitted.
+        * note that this check is protected by two locks,
+        * hw queue lock and completion object lock,
+        * and the same completion object lock also protects
+        * the hw_sob reset handler function.
+        * The hw_queue lock prevent out of sync of hw_sob
+        * refcount value, changed by signal/wait flows.
+        */
+       spin_lock(&signal_cs_cmpl->lock);
+
+       if (completion_done(&cs->signal_fence->completion)) {
+               spin_unlock(&signal_cs_cmpl->lock);
+               return -EINVAL;
+       }
+       /* Increment kref since all slave queues are now waiting on it */
+       kref_get(&cs_cmpl->hw_sob->kref);
+
+       spin_unlock(&signal_cs_cmpl->lock);
 
        /* Calculate the stream from collective master queue (1st job) */
        job = list_first_entry(&cs->job_list, struct hl_cs_job, cs_node);
@@ -1210,21 +1356,17 @@ static void gaudi_collective_wait_init_cs(struct hl_cs *cs)
                                cprop->curr_sob_group_idx[stream], stream);
        }
 
-       /* Increment kref since all slave queues are now waiting on it */
-       kref_get(&cs_cmpl->hw_sob->kref);
-       /*
-        * Must put the signal fence after the SOB refcnt increment so
-        * the SOB refcnt won't turn 0 and reset the SOB before the
-        * wait CS was submitted.
-        */
        mb();
        hl_fence_put(cs->signal_fence);
        cs->signal_fence = NULL;
+
+       return 0;
 }
 
 static int gaudi_collective_wait_create_job(struct hl_device *hdev,
                struct hl_ctx *ctx, struct hl_cs *cs,
-               enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id)
+               enum hl_collective_mode mode, u32 queue_id, u32 wait_queue_id,
+               u32 encaps_signal_offset)
 {
        struct hw_queue_properties *hw_queue_prop;
        struct hl_cs_counters_atomic *cntr;
@@ -1284,6 +1426,13 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
        job->user_cb_size = cb_size;
        job->hw_queue_id = queue_id;
 
+       /* since its guaranteed to have only one chunk in the collective wait
+        * cs, we can use this chunk to set the encapsulated signal offset
+        * in the jobs.
+        */
+       if (cs->encaps_signals)
+               job->encaps_sig_wait_offset = encaps_signal_offset;
+
        /*
         * No need in parsing, user CB is the patched CB.
         * We call hl_cb_destroy() out of two reasons - we don't need
@@ -1312,8 +1461,9 @@ static int gaudi_collective_wait_create_job(struct hl_device *hdev,
 }
 
 static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
-               struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
-               u32 collective_engine_id)
+               struct hl_ctx *ctx, struct hl_cs *cs,
+               u32 wait_queue_id, u32 collective_engine_id,
+               u32 encaps_signal_offset)
 {
        struct gaudi_device *gaudi = hdev->asic_specific;
        struct hw_queue_properties *hw_queue_prop;
@@ -1363,7 +1513,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
                if (i == 0) {
                        queue_id = wait_queue_id;
                        rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
-                               HL_COLLECTIVE_MASTER, queue_id, wait_queue_id);
+                               HL_COLLECTIVE_MASTER, queue_id,
+                               wait_queue_id, encaps_signal_offset);
                } else {
                        if (nic_idx < NIC_NUMBER_OF_ENGINES) {
                                if (gaudi->hw_cap_initialized &
@@ -1383,7 +1534,8 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
                        }
 
                        rc = gaudi_collective_wait_create_job(hdev, ctx, cs,
-                               HL_COLLECTIVE_SLAVE, queue_id, wait_queue_id);
+                               HL_COLLECTIVE_SLAVE, queue_id,
+                               wait_queue_id, encaps_signal_offset);
                }
 
                if (rc)
@@ -1431,6 +1583,11 @@ static int gaudi_late_init(struct hl_device *hdev)
                return rc;
        }
 
+       /* Scrub both SRAM and DRAM */
+       rc = hdev->asic_funcs->scrub_device_mem(hdev, 0, 0);
+       if (rc)
+               goto disable_pci_access;
+
        rc = gaudi_fetch_psoc_frequency(hdev);
        if (rc) {
                dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
@@ -1455,6 +1612,11 @@ static int gaudi_late_init(struct hl_device *hdev)
                goto disable_pci_access;
        }
 
+       /* We only support a single ASID for the user, so for the sake of optimization, just
+        * initialize the ASID one time during device initialization with the fixed value of 1
+        */
+       gaudi_mmu_prepare(hdev, 1);
+
        return 0;
 
 disable_pci_access:
@@ -1720,8 +1882,12 @@ static int gaudi_sw_init(struct hl_device *hdev)
        hdev->supports_sync_stream = true;
        hdev->supports_coresight = true;
        hdev->supports_staged_submission = true;
+       hdev->supports_wait_for_multi_cs = true;
 
-       gaudi_set_pci_memory_regions(hdev);
+       hdev->asic_funcs->set_pci_memory_regions(hdev);
+       hdev->stream_master_qid_arr =
+                               hdev->asic_funcs->get_stream_master_qid_arr();
+       hdev->stream_master_qid_arr_size = GAUDI_STREAM_MASTER_ARR_SIZE;
 
        return 0;
 
@@ -2523,7 +2689,7 @@ static void gaudi_init_golden_registers(struct hl_device *hdev)
                                tpc_id < TPC_NUMBER_OF_ENGINES;
                                tpc_id++, tpc_offset += TPC_CFG_OFFSET) {
                /* Mask all arithmetic interrupts from TPC */
-               WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFF);
+               WREG32(mmTPC0_CFG_TPC_INTR_MASK + tpc_offset, 0x8FFE);
                /* Set 16 cache lines */
                WREG32_FIELD(TPC0_CFG_MSS_CONFIG, tpc_offset,
                                ICACHE_FETCH_LINE_NUM, 2);
@@ -3670,7 +3836,7 @@ static void gaudi_disable_timestamp(struct hl_device *hdev)
        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
 }
 
-static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
+static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
        u32 wait_timeout_ms;
 
@@ -3682,6 +3848,9 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
        else
                wait_timeout_ms = GAUDI_RESET_WAIT_MSEC;
 
+       if (fw_reset)
+               goto skip_engines;
+
        gaudi_stop_nic_qmans(hdev);
        gaudi_stop_mme_qmans(hdev);
        gaudi_stop_tpc_qmans(hdev);
@@ -3707,6 +3876,7 @@ static void gaudi_halt_engines(struct hl_device *hdev, bool hard_reset)
 
        gaudi_disable_timestamp(hdev);
 
+skip_engines:
        gaudi_disable_msi(hdev);
 }
 
@@ -3739,6 +3909,9 @@ static int gaudi_mmu_init(struct hl_device *hdev)
        WREG32(mmSTLB_CACHE_INV_BASE_39_8, MMU_CACHE_MNG_ADDR >> 8);
        WREG32(mmSTLB_CACHE_INV_BASE_49_40, MMU_CACHE_MNG_ADDR >> 40);
 
+       /* mem cache invalidation */
+       WREG32(mmSTLB_MEM_CACHE_INVALIDATION, 1);
+
        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, 0);
 
        WREG32(mmMMU_UP_MMU_ENABLE, 1);
@@ -4071,7 +4244,7 @@ disable_queues:
        return rc;
 }
 
-static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
+static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
        struct cpu_dyn_regs *dyn_regs =
                        &hdev->fw_loader.dynamic_loader.comm_desc.cpu_dyn_regs;
@@ -4092,6 +4265,14 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
                cpu_timeout_ms = GAUDI_CPU_RESET_WAIT_MSEC;
        }
 
+       if (fw_reset) {
+               dev_info(hdev->dev,
+                       "Firmware performs HARD reset, going to wait %dms\n",
+                       reset_timeout_ms);
+
+               goto skip_reset;
+       }
+
        driver_performs_reset = !!(!hdev->asic_prop.fw_security_enabled &&
                                        !hdev->asic_prop.hard_reset_done_by_fw);
 
@@ -4168,6 +4349,7 @@ static void gaudi_hw_fini(struct hl_device *hdev, bool hard_reset)
                        reset_timeout_ms);
        }
 
+skip_reset:
        /*
         * After hard reset, we can't poll the BTM_FSM register because the PSOC
         * itself is in reset. Need to wait until the reset is deasserted
@@ -4212,7 +4394,7 @@ static int gaudi_resume(struct hl_device *hdev)
        return gaudi_init_iatu(hdev);
 }
 
-static int gaudi_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+static int gaudi_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
                        void *cpu_addr, dma_addr_t dma_addr, size_t size)
 {
        int rc;
@@ -4621,8 +4803,8 @@ static int gaudi_hbm_scrubbing(struct hl_device *hdev)
                                "Doing HBM scrubbing for 0x%09llx - 0x%09llx\n",
                                cur_addr, cur_addr + chunk_size);
 
-                       WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0);
-                       WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0);
+                       WREG32(mmDMA0_CORE_SRC_BASE_LO + dma_offset, 0xdeadbeaf);
+                       WREG32(mmDMA0_CORE_SRC_BASE_HI + dma_offset, 0xdeadbeaf);
                        WREG32(mmDMA0_CORE_DST_BASE_LO + dma_offset,
                                                lower_32_bits(cur_addr));
                        WREG32(mmDMA0_CORE_DST_BASE_HI + dma_offset,
@@ -5796,78 +5978,6 @@ release_cb:
        return rc;
 }
 
-static int gaudi_schedule_register_memset(struct hl_device *hdev,
-               u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
-{
-       struct hl_ctx *ctx;
-       struct hl_pending_cb *pending_cb;
-       struct packet_msg_long *pkt;
-       u32 cb_size, ctl;
-       struct hl_cb *cb;
-       int i, rc;
-
-       mutex_lock(&hdev->fpriv_list_lock);
-       ctx = hdev->compute_ctx;
-
-       /* If no compute context available or context is going down
-        * memset registers directly
-        */
-       if (!ctx || kref_read(&ctx->refcount) == 0) {
-               rc = gaudi_memset_registers(hdev, reg_base, num_regs, val);
-               mutex_unlock(&hdev->fpriv_list_lock);
-               return rc;
-       }
-
-       mutex_unlock(&hdev->fpriv_list_lock);
-
-       cb_size = (sizeof(*pkt) * num_regs) +
-                       sizeof(struct packet_msg_prot) * 2;
-
-       if (cb_size > SZ_2M) {
-               dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
-               return -ENOMEM;
-       }
-
-       pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
-       if (!pending_cb)
-               return -ENOMEM;
-
-       cb = hl_cb_kernel_create(hdev, cb_size, false);
-       if (!cb) {
-               kfree(pending_cb);
-               return -EFAULT;
-       }
-
-       pkt = cb->kernel_address;
-
-       ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
-       ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
-       ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
-       ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
-       ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
-
-       for (i = 0; i < num_regs ; i++, pkt++) {
-               pkt->ctl = cpu_to_le32(ctl);
-               pkt->value = cpu_to_le32(val);
-               pkt->addr = cpu_to_le64(reg_base + (i * 4));
-       }
-
-       hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
-
-       pending_cb->cb = cb;
-       pending_cb->cb_size = cb_size;
-       /* The queue ID MUST be an external queue ID. Otherwise, we will
-        * have undefined behavior
-        */
-       pending_cb->hw_queue_id = hw_queue_id;
-
-       spin_lock(&ctx->pending_cb_lock);
-       list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
-       spin_unlock(&ctx->pending_cb_lock);
-
-       return 0;
-}
-
 static int gaudi_restore_sm_registers(struct hl_device *hdev)
 {
        u64 base_addr;
@@ -6013,7 +6123,7 @@ static int gaudi_restore_user_registers(struct hl_device *hdev)
 
 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
 {
-       return gaudi_restore_user_registers(hdev);
+       return 0;
 }
 
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
@@ -6723,6 +6833,9 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
                                asid);
        }
 
+       gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER, asid);
+       gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER, asid);
+
        hdev->asic_funcs->set_clock_gating(hdev);
 
        mutex_unlock(&gaudi->clk_gate_mutex);
@@ -6772,7 +6885,8 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
 
        dma_offset = gaudi_dma_assignment[GAUDI_PCI_DMA_1] * DMA_CORE_OFFSET;
 
-       WREG32_OR(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_VAL_SHIFT));
+       WREG32(mmDMA0_CORE_PROT + dma_offset,
+                       BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT) | BIT(DMA0_CORE_PROT_VAL_SHIFT));
 
        rc = hl_hw_queue_send_cb_no_cmpl(hdev, GAUDI_QUEUE_ID_DMA_0_0,
                                        job->job_cb_size, cb->bus_address);
@@ -6793,8 +6907,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
        }
 
 free_fence_ptr:
-       WREG32_AND(mmDMA0_CORE_PROT + dma_offset,
-                       ~BIT(DMA0_CORE_PROT_VAL_SHIFT));
+       WREG32(mmDMA0_CORE_PROT + dma_offset, BIT(DMA0_CORE_PROT_ERR_VAL_SHIFT));
 
        hdev->asic_funcs->asic_dma_pool_free(hdev, (void *) fence_ptr,
                                        fence_dma_addr);
@@ -7168,7 +7281,7 @@ static void gaudi_print_sw_config_stream_data(struct hl_device *hdev, u32 stream
 
        cq_ptr = (((u64) RREG32(cq_ptr_hi)) << 32) | RREG32(cq_ptr_lo);
        size = RREG32(cq_tsize);
-       dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %x\n",
+       dev_info(hdev->dev, "stop on err: stream: %u, addr: %#llx, size: %u\n",
                                                        stream, cq_ptr, size);
 }
 
@@ -7224,7 +7337,7 @@ static void gaudi_print_last_pqes_on_err(struct hl_device *hdev, u32 qid_base,
 
                addr = le64_to_cpu(bd->ptr);
 
-               dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %x\n",
+               dev_info(hdev->dev, "stop on err PQE(stream %u): ci: %u, addr: %#llx, size: %u\n",
                                                        stream, ci, addr, len);
 
                /* get previous ci, wrap if needed */
@@ -7326,24 +7439,30 @@ static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
 {
        u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
 
+       /* Flip the bits as the enum is ordered in the opposite way */
+       index = (index ^ 0x3) & 0x3;
+
        switch (sei_data->sei_cause) {
        case SM_SEI_SO_OVERFLOW:
-               dev_err(hdev->dev,
-                       "SM %u SEI Error: SO %u overflow/underflow",
-                       index, le32_to_cpu(sei_data->sei_log));
+               dev_err_ratelimited(hdev->dev,
+                       "%s SEI Error: SOB Group %u overflow/underflow",
+                       gaudi_sync_manager_names[index],
+                       le32_to_cpu(sei_data->sei_log));
                break;
        case SM_SEI_LBW_4B_UNALIGNED:
-               dev_err(hdev->dev,
-                       "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
-                       index, le32_to_cpu(sei_data->sei_log));
+               dev_err_ratelimited(hdev->dev,
+                       "%s SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
+                       gaudi_sync_manager_names[index],
+                       le32_to_cpu(sei_data->sei_log));
                break;
        case SM_SEI_AXI_RESPONSE_ERR:
-               dev_err(hdev->dev,
-                       "SM %u SEI Error: AXI ID %u response error",
-                       index, le32_to_cpu(sei_data->sei_log));
+               dev_err_ratelimited(hdev->dev,
+                       "%s SEI Error: AXI ID %u response error",
+                       gaudi_sync_manager_names[index],
+                       le32_to_cpu(sei_data->sei_log));
                break;
        default:
-               dev_err(hdev->dev, "Unknown SM SEI cause %u",
+               dev_err_ratelimited(hdev->dev, "Unknown SM SEI cause %u",
                                le32_to_cpu(sei_data->sei_log));
                break;
        }
@@ -7358,6 +7477,11 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
        bool extract_info_from_fw;
        int rc;
 
+       if (hdev->asic_prop.fw_security_enabled) {
+               extract_info_from_fw = true;
+               goto extract_ecc_info;
+       }
+
        switch (event_type) {
        case GAUDI_EVENT_PCIE_CORE_SERR ... GAUDI_EVENT_PCIE_PHY_DERR:
        case GAUDI_EVENT_DMA0_SERR_ECC ... GAUDI_EVENT_MMU_DERR:
@@ -7430,6 +7554,7 @@ static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
                return;
        }
 
+extract_ecc_info:
        if (extract_info_from_fw) {
                ecc_address = le64_to_cpu(ecc_data->ecc_address);
                ecc_syndrom = le64_to_cpu(ecc_data->ecc_syndrom);
@@ -7806,8 +7931,15 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
        u32 ctl = le32_to_cpu(eq_entry->hdr.ctl);
        u16 event_type = ((ctl & EQ_CTL_EVENT_TYPE_MASK)
                        >> EQ_CTL_EVENT_TYPE_SHIFT);
-       u8 cause;
        bool reset_required;
+       u8 cause;
+       int rc;
+
+       if (event_type >= GAUDI_EVENT_SIZE) {
+               dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
+                               event_type, GAUDI_EVENT_SIZE - 1);
+               return;
+       }
 
        gaudi->events_stat[event_type]++;
        gaudi->events_stat_aggregate[event_type]++;
@@ -7880,10 +8012,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
                                        tpc_dec_event_to_tpc_id(event_type),
                                        "AXI_SLV_DEC_Error");
                if (reset_required) {
-                       dev_err(hdev->dev, "hard reset required due to %s\n",
+                       dev_err(hdev->dev, "reset required due to %s\n",
                                gaudi_irq_map_table[event_type].name);
 
-                       goto reset_device;
+                       hl_device_reset(hdev, 0);
                } else {
                        hl_fw_unmask_irq(hdev, event_type);
                }
@@ -7902,10 +8034,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
                                        tpc_krn_event_to_tpc_id(event_type),
                                        "KRN_ERR");
                if (reset_required) {
-                       dev_err(hdev->dev, "hard reset required due to %s\n",
+                       dev_err(hdev->dev, "reset required due to %s\n",
                                gaudi_irq_map_table[event_type].name);
 
-                       goto reset_device;
+                       hl_device_reset(hdev, 0);
                } else {
                        hl_fw_unmask_irq(hdev, event_type);
                }
@@ -7993,6 +8125,10 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
                gaudi_print_irq_info(hdev, event_type, false);
                gaudi_print_sm_sei_info(hdev, event_type,
                                        &eq_entry->sm_sei_data);
+               rc = hl_state_dump(hdev);
+               if (rc)
+                       dev_err(hdev->dev,
+                               "Error during system state dump %d\n", rc);
                hl_fw_unmask_irq(hdev, event_type);
                break;
 
@@ -8031,7 +8167,9 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
        return;
 
 reset_device:
-       if (hdev->hard_reset_on_fw_events)
+       if (hdev->asic_prop.fw_security_enabled)
+               hl_device_reset(hdev, HL_RESET_HARD | HL_RESET_FW);
+       else if (hdev->hard_reset_on_fw_events)
                hl_device_reset(hdev, HL_RESET_HARD);
        else
                hl_fw_unmask_irq(hdev, event_type);
@@ -8563,11 +8701,20 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
 
 static int gaudi_ctx_init(struct hl_ctx *ctx)
 {
+       int rc;
+
        if (ctx->asid == HL_KERNEL_ASID_ID)
                return 0;
 
-       gaudi_mmu_prepare(ctx->hdev, ctx->asid);
-       return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
+       rc = gaudi_internal_cb_pool_init(ctx->hdev, ctx);
+       if (rc)
+               return rc;
+
+       rc = gaudi_restore_user_registers(ctx->hdev);
+       if (rc)
+               gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
+
+       return rc;
 }
 
 static void gaudi_ctx_fini(struct hl_ctx *ctx)
@@ -8596,6 +8743,11 @@ static u32 gaudi_get_wait_cb_size(struct hl_device *hdev)
                        sizeof(struct packet_msg_prot) * 2;
 }
 
+static u32 gaudi_get_sob_addr(struct hl_device *hdev, u32 sob_id)
+{
+       return mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + (sob_id * 4);
+}
+
 static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
                                u32 size, bool eb)
 {
@@ -8902,16 +9054,12 @@ static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
 {
        struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
-       int rc;
 
        dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
                hw_sob->sob_id);
 
-       rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
-                       CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-                       hw_sob->sob_id * 4, 1, 0);
-       if (rc)
-               dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
+       WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+                       hw_sob->sob_id * 4, 0);
 
        kref_init(&hw_sob->kref);
 }
@@ -8977,6 +9125,280 @@ static int gaudi_map_pll_idx_to_fw_idx(u32 pll_idx)
        }
 }
 
+static int gaudi_add_sync_to_engine_map_entry(
+       struct hl_sync_to_engine_map *map, u32 reg_value,
+       enum hl_sync_engine_type engine_type, u32 engine_id)
+{
+       struct hl_sync_to_engine_map_entry *entry;
+
+       /* Reg value represents a partial address of sync object,
+        * it is used as unique identifier. For this we need to
+        * clear the cutoff cfg base bits from the value.
+        */
+       if (reg_value == 0 || reg_value == 0xffffffff)
+               return 0;
+       reg_value -= (u32)CFG_BASE;
+
+       /* create a new hash entry */
+       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+       entry->engine_type = engine_type;
+       entry->engine_id = engine_id;
+       entry->sync_id = reg_value;
+       hash_add(map->tb, &entry->node, reg_value);
+
+       return 0;
+}
+
+static int gaudi_gen_sync_to_engine_map(struct hl_device *hdev,
+                               struct hl_sync_to_engine_map *map)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       struct gaudi_device *gaudi = hdev->asic_specific;
+       int i, j, rc;
+       u32 reg_value;
+
+       /* Iterate over TPC engines */
+       for (i = 0; i < sds->props[SP_NUM_OF_TPC_ENGINES]; ++i) {
+               /* TPC registered must be accessed with clock gating disabled */
+               mutex_lock(&gaudi->clk_gate_mutex);
+               hdev->asic_funcs->disable_clock_gating(hdev);
+
+               reg_value = RREG32(sds->props[SP_TPC0_CFG_SO] +
+                                       sds->props[SP_NEXT_TPC] * i);
+
+               /* We can reenable clock_gating */
+               hdev->asic_funcs->set_clock_gating(hdev);
+               mutex_unlock(&gaudi->clk_gate_mutex);
+
+               rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
+                                                       ENGINE_TPC, i);
+               if (rc)
+                       goto free_sync_to_engine_map;
+       }
+
+       /* Iterate over MME engines */
+       for (i = 0; i < sds->props[SP_NUM_OF_MME_ENGINES]; ++i) {
+               for (j = 0; j < sds->props[SP_SUB_MME_ENG_NUM]; ++j) {
+                       /* MME registered must be accessed with clock gating
+                        * disabled
+                        */
+                       mutex_lock(&gaudi->clk_gate_mutex);
+                       hdev->asic_funcs->disable_clock_gating(hdev);
+
+                       reg_value = RREG32(sds->props[SP_MME_CFG_SO] +
+                                               sds->props[SP_NEXT_MME] * i +
+                                               j * sizeof(u32));
+
+                       /* We can reenable clock_gating */
+                       hdev->asic_funcs->set_clock_gating(hdev);
+                       mutex_unlock(&gaudi->clk_gate_mutex);
+
+                       rc = gaudi_add_sync_to_engine_map_entry(
+                               map, reg_value, ENGINE_MME,
+                               i * sds->props[SP_SUB_MME_ENG_NUM] + j);
+                       if (rc)
+                               goto free_sync_to_engine_map;
+               }
+       }
+
+       /* Iterate over DMA engines */
+       for (i = 0; i < sds->props[SP_NUM_OF_DMA_ENGINES]; ++i) {
+               reg_value = RREG32(sds->props[SP_DMA_CFG_SO] +
+                                       sds->props[SP_DMA_QUEUES_OFFSET] * i);
+               rc = gaudi_add_sync_to_engine_map_entry(map, reg_value,
+                                                       ENGINE_DMA, i);
+               if (rc)
+                       goto free_sync_to_engine_map;
+       }
+
+       return 0;
+
+free_sync_to_engine_map:
+       hl_state_dump_free_sync_to_engine_map(map);
+
+       return rc;
+}
+
+static int gaudi_monitor_valid(struct hl_mon_state_dump *mon)
+{
+       return FIELD_GET(
+               SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK,
+               mon->status);
+}
+
+static void gaudi_fill_sobs_from_mon(char *sobs, struct hl_mon_state_dump *mon)
+{
+       const size_t max_write = 10;
+       u32 gid, mask, sob;
+       int i, offset;
+
+       /* Sync object ID is calculated as follows:
+        * (8 * group_id + cleared bits in mask)
+        */
+       gid = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
+                       mon->arm_data);
+       mask = FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
+                       mon->arm_data);
+
+       for (i = 0, offset = 0; mask && offset < MONITOR_SOB_STRING_SIZE -
+               max_write; mask >>= 1, i++) {
+               if (!(mask & 1)) {
+                       sob = gid * MONITOR_MAX_SOBS + i;
+
+                       if (offset > 0)
+                               offset += snprintf(sobs + offset, max_write,
+                                                       ", ");
+
+                       offset += snprintf(sobs + offset, max_write, "%u", sob);
+               }
+       }
+}
+
+static int gaudi_print_single_monitor(char **buf, size_t *size, size_t *offset,
+                               struct hl_device *hdev,
+                               struct hl_mon_state_dump *mon)
+{
+       const char *name;
+       char scratch_buf1[BIN_REG_STRING_SIZE],
+               scratch_buf2[BIN_REG_STRING_SIZE];
+       char monitored_sobs[MONITOR_SOB_STRING_SIZE] = {0};
+
+       name = hl_state_dump_get_monitor_name(hdev, mon);
+       if (!name)
+               name = "";
+
+       gaudi_fill_sobs_from_mon(monitored_sobs, mon);
+
+       return hl_snprintf_resize(
+               buf, size, offset,
+               "Mon id: %u%s, wait for group id: %u mask %s to reach val: %u and write %u to address 0x%llx. Pending: %s. Means sync objects [%s] are being monitored.",
+               mon->id, name,
+               FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK,
+                               mon->arm_data),
+               hl_format_as_binary(
+                       scratch_buf1, sizeof(scratch_buf1),
+                       FIELD_GET(
+                               SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK,
+                               mon->arm_data)),
+               FIELD_GET(SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK,
+                               mon->arm_data),
+               mon->wr_data,
+               (((u64)mon->wr_addr_high) << 32) | mon->wr_addr_low,
+               hl_format_as_binary(
+                       scratch_buf2, sizeof(scratch_buf2),
+                       FIELD_GET(
+                               SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK,
+                               mon->status)),
+               monitored_sobs);
+}
+
+
+static int gaudi_print_fences_single_engine(
+       struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
+       enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
+       size_t *size, size_t *offset)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       int rc = -ENOMEM, i;
+       u32 *statuses, *fences;
+
+       statuses = kcalloc(sds->props[SP_ENGINE_NUM_OF_QUEUES],
+                       sizeof(*statuses), GFP_KERNEL);
+       if (!statuses)
+               goto out;
+
+       fences = kcalloc(sds->props[SP_ENGINE_NUM_OF_FENCES] *
+                               sds->props[SP_ENGINE_NUM_OF_QUEUES],
+                        sizeof(*fences), GFP_KERNEL);
+       if (!fences)
+               goto free_status;
+
+       for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES]; ++i)
+               statuses[i] = RREG32(status_base_offset + i * sizeof(u32));
+
+       for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_FENCES] *
+                               sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i)
+               fences[i] = RREG32(base_offset + i * sizeof(u32));
+
+       /* The actual print */
+       for (i = 0; i < sds->props[SP_ENGINE_NUM_OF_QUEUES]; ++i) {
+               u32 fence_id;
+               u64 fence_cnt, fence_rdata;
+               const char *engine_name;
+
+               if (!FIELD_GET(TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK,
+                       statuses[i]))
+                       continue;
+
+               fence_id =
+                       FIELD_GET(TPC0_QM_CP_STS_0_FENCE_ID_MASK, statuses[i]);
+               fence_cnt = base_offset + CFG_BASE +
+                       sizeof(u32) *
+                       (i + fence_id * sds->props[SP_ENGINE_NUM_OF_QUEUES]);
+               fence_rdata = fence_cnt - sds->props[SP_FENCE0_CNT_OFFSET] +
+                               sds->props[SP_FENCE0_RDATA_OFFSET];
+               engine_name = hl_sync_engine_to_string(engine_type);
+
+               rc = hl_snprintf_resize(
+                       buf, size, offset,
+                       "%s%u, stream %u: fence id %u cnt = 0x%llx (%s%u_QM.CP_FENCE%u_CNT_%u) rdata = 0x%llx (%s%u_QM.CP_FENCE%u_RDATA_%u) value = %u, cp_status = %u\n",
+                       engine_name, engine_id,
+                       i, fence_id,
+                       fence_cnt, engine_name, engine_id, fence_id, i,
+                       fence_rdata, engine_name, engine_id, fence_id, i,
+                       fences[fence_id],
+                       statuses[i]);
+               if (rc)
+                       goto free_fences;
+       }
+
+       rc = 0;
+
+free_fences:
+       kfree(fences);
+free_status:
+       kfree(statuses);
+out:
+       return rc;
+}
+
+
+static struct hl_state_dump_specs_funcs gaudi_state_dump_funcs = {
+       .monitor_valid = gaudi_monitor_valid,
+       .print_single_monitor = gaudi_print_single_monitor,
+       .gen_sync_to_engine_map = gaudi_gen_sync_to_engine_map,
+       .print_fences_single_engine = gaudi_print_fences_single_engine,
+};
+
+static void gaudi_state_dump_init(struct hl_device *hdev)
+{
+       struct hl_state_dump_specs *sds = &hdev->state_dump_specs;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(gaudi_so_id_to_str); ++i)
+               hash_add(sds->so_id_to_str_tb,
+                       &gaudi_so_id_to_str[i].node,
+                       gaudi_so_id_to_str[i].id);
+
+       for (i = 0; i < ARRAY_SIZE(gaudi_monitor_id_to_str); ++i)
+               hash_add(sds->monitor_id_to_str_tb,
+                       &gaudi_monitor_id_to_str[i].node,
+                       gaudi_monitor_id_to_str[i].id);
+
+       sds->props = gaudi_state_dump_specs_props;
+
+       sds->sync_namager_names = gaudi_sync_manager_names;
+
+       sds->funcs = gaudi_state_dump_funcs;
+}
+
+static u32 *gaudi_get_stream_master_qid_arr(void)
+{
+       return gaudi_stream_master;
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
        .early_init = gaudi_early_init,
        .early_fini = gaudi_early_fini,
@@ -8989,7 +9411,7 @@ static const struct hl_asic_funcs gaudi_funcs = {
        .halt_engines = gaudi_halt_engines,
        .suspend = gaudi_suspend,
        .resume = gaudi_resume,
-       .cb_mmap = gaudi_cb_mmap,
+       .mmap = gaudi_mmap,
        .ring_doorbell = gaudi_ring_doorbell,
        .pqe_write = gaudi_pqe_write,
        .asic_dma_alloc_coherent = gaudi_dma_alloc_coherent,
@@ -9062,7 +9484,11 @@ static const struct hl_asic_funcs gaudi_funcs = {
        .enable_events_from_fw = gaudi_enable_events_from_fw,
        .map_pll_idx_to_fw_idx = gaudi_map_pll_idx_to_fw_idx,
        .init_firmware_loader = gaudi_init_firmware_loader,
-       .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm
+       .init_cpu_scrambler_dram = gaudi_init_scrambler_hbm,
+       .state_dump_init = gaudi_state_dump_init,
+       .get_sob_addr = gaudi_get_sob_addr,
+       .set_pci_memory_regions = gaudi_set_pci_memory_regions,
+       .get_stream_master_qid_arr = gaudi_get_stream_master_qid_arr
 };
 
 /**
index 957bf37..bbbf1c3 100644 (file)
@@ -36,6 +36,8 @@
 #define NUMBER_OF_INTERRUPTS           (NUMBER_OF_CMPLT_QUEUES + \
                                                NUMBER_OF_CPU_HW_QUEUES)
 
+#define GAUDI_STREAM_MASTER_ARR_SIZE   8
+
 #if (NUMBER_OF_INTERRUPTS > GAUDI_MSI_ENTRIES)
 #error "Number of MSI interrupts must be smaller or equal to GAUDI_MSI_ENTRIES"
 #endif
@@ -50,6 +52,8 @@
 #define DC_POWER_DEFAULT_PCI           60000           /* 60W */
 #define DC_POWER_DEFAULT_PMC           60000           /* 60W */
 
+#define DC_POWER_DEFAULT_PMC_SEC       97000           /* 97W */
+
 #define GAUDI_CPU_TIMEOUT_USEC         30000000        /* 30s */
 
 #define TPC_ENABLED_MASK               0xFF
@@ -62,7 +66,7 @@
 
 #define DMA_MAX_TRANSFER_SIZE          U32_MAX
 
-#define GAUDI_DEFAULT_CARD_NAME                "HL2000"
+#define GAUDI_DEFAULT_CARD_NAME                "HL205"
 
 #define GAUDI_MAX_PENDING_CS           SZ_16K
 
        (((mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511 - \
        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0) + 4) >> 2)
 
+#define MONITOR_MAX_SOBS       8
 
 /* DRAM Memory Map */
 
 #define HW_CAP_TPC_MASK                GENMASK(31, 24)
 #define HW_CAP_TPC_SHIFT       24
 
+#define NEXT_SYNC_OBJ_ADDR_INTERVAL \
+       (mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 - \
+        mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0)
+#define NUM_OF_MME_ENGINES                     2
+#define NUM_OF_MME_SUB_ENGINES         2
+#define NUM_OF_TPC_ENGINES                     8
+#define NUM_OF_DMA_ENGINES                     8
+#define NUM_OF_QUEUES                          5
+#define NUM_OF_STREAMS                         4
+#define NUM_OF_FENCES                          4
+
+
 #define GAUDI_CPU_PCI_MSB_ADDR(addr)   (((addr) & GENMASK_ULL(49, 39)) >> 39)
 #define GAUDI_PCI_TO_CPU_ADDR(addr)                    \
        do {                                            \
index c2a27ed..5349c1b 100644 (file)
@@ -622,11 +622,6 @@ static int gaudi_config_etr(struct hl_device *hdev,
                        return -EINVAL;
                }
 
-               gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_ARUSER,
-                                               hdev->compute_ctx->asid);
-               gaudi_mmu_prepare_reg(hdev, mmPSOC_GLOBAL_CONF_TRACE_AWUSER,
-                                               hdev->compute_ctx->asid);
-
                msb = upper_32_bits(input->buffer_address) >> 8;
                msb &= PSOC_GLOBAL_CONF_TRACE_ADDR_MSB_MASK;
                WREG32(mmPSOC_GLOBAL_CONF_TRACE_ADDR, msb);
index 0d3240f..cb265c0 100644 (file)
@@ -9559,6 +9559,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
        mask |= 1U << ((mmTPC0_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC0_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
        mask |= 1U << ((mmTPC0_CFG_TPC_STALL & 0x7F) >> 2);
+       mask |= 1U << ((mmTPC0_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC0_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC0_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC0_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -10013,6 +10014,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
        mask |= 1U << ((mmTPC1_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC1_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
        mask |= 1U << ((mmTPC1_CFG_TPC_STALL & 0x7F) >> 2);
+       mask |= 1U << ((mmTPC1_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC1_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC1_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC1_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -10466,6 +10468,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
        mask |= 1U << ((mmTPC2_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC2_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
        mask |= 1U << ((mmTPC2_CFG_TPC_STALL & 0x7F) >> 2);
+       mask |= 1U << ((mmTPC2_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC2_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC2_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC2_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -10919,6 +10922,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
        mask |= 1U << ((mmTPC3_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC3_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
        mask |= 1U << ((mmTPC3_CFG_TPC_STALL & 0x7F) >> 2);
+       mask |= 1U << ((mmTPC3_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC3_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC3_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC3_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -11372,6 +11376,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
        mask |= 1U << ((mmTPC4_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC4_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
        mask |= 1U << ((mmTPC4_CFG_TPC_STALL & 0x7F) >> 2);
+       mask |= 1U << ((mmTPC4_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC4_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC4_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC4_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -11825,6 +11830,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
        mask |= 1U << ((mmTPC5_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC5_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
        mask |= 1U << ((mmTPC5_CFG_TPC_STALL & 0x7F) >> 2);
+       mask |= 1U << ((mmTPC5_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC5_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC5_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC5_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -12280,6 +12286,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
        mask |= 1U << ((mmTPC6_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC6_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
        mask |= 1U << ((mmTPC6_CFG_TPC_STALL & 0x7F) >> 2);
+       mask |= 1U << ((mmTPC6_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC6_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC6_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC6_CFG_MSS_CONFIG & 0x7F) >> 2);
@@ -12735,6 +12742,7 @@ static void gaudi_init_tpc_protection_bits(struct hl_device *hdev)
        mask |= 1U << ((mmTPC7_CFG_CFG_BASE_ADDRESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC7_CFG_CFG_SUBTRACT_VALUE & 0x7F) >> 2);
        mask |= 1U << ((mmTPC7_CFG_TPC_STALL & 0x7F) >> 2);
+       mask |= 1U << ((mmTPC7_CFG_ICACHE_BASE_ADDERESS_HIGH & 0x7F) >> 2);
        mask |= 1U << ((mmTPC7_CFG_RD_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC7_CFG_WR_RATE_LIMIT & 0x7F) >> 2);
        mask |= 1U << ((mmTPC7_CFG_MSS_CONFIG & 0x7F) >> 2);
index 755e08c..031c184 100644 (file)
@@ -350,6 +350,8 @@ static u32 goya_all_events[] = {
        GOYA_ASYNC_EVENT_ID_FIX_THERMAL_ENV_E
 };
 
+static s64 goya_state_dump_specs_props[SP_MAX] = {0};
+
 static int goya_mmu_clear_pgt_range(struct hl_device *hdev);
 static int goya_mmu_set_dram_default_page(struct hl_device *hdev);
 static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev);
@@ -387,6 +389,7 @@ int goya_set_fixed_properties(struct hl_device *hdev)
                prop->hw_queues_props[i].cb_alloc_flags = CB_ALLOC_USER;
        }
 
+       prop->device_dma_offset_for_host_access = HOST_PHYS_BASE;
        prop->completion_queues_count = NUMBER_OF_CMPLT_QUEUES;
 
        prop->dram_base_address = DRAM_PHYS_BASE;
@@ -466,6 +469,8 @@ int goya_set_fixed_properties(struct hl_device *hdev)
        prop->hard_reset_done_by_fw = false;
        prop->gic_interrupts_enable = true;
 
+       prop->server_type = HL_SERVER_TYPE_UNKNOWN;
+
        return 0;
 }
 
@@ -649,14 +654,14 @@ pci_init:
                                        GOYA_BOOT_FIT_REQ_TIMEOUT_USEC);
        if (rc) {
                if (hdev->reset_on_preboot_fail)
-                       hdev->asic_funcs->hw_fini(hdev, true);
+                       hdev->asic_funcs->hw_fini(hdev, true, false);
                goto pci_fini;
        }
 
        if (goya_get_hw_state(hdev) == HL_DEVICE_HW_STATE_DIRTY) {
                dev_info(hdev->dev,
                        "H/W state is dirty, must reset before initializing\n");
-               hdev->asic_funcs->hw_fini(hdev, true);
+               hdev->asic_funcs->hw_fini(hdev, true, false);
        }
 
        if (!hdev->pldm) {
@@ -955,8 +960,9 @@ static int goya_sw_init(struct hl_device *hdev)
        hdev->supports_coresight = true;
        hdev->supports_soft_reset = true;
        hdev->allow_external_soft_reset = true;
+       hdev->supports_wait_for_multi_cs = false;
 
-       goya_set_pci_memory_regions(hdev);
+       hdev->asic_funcs->set_pci_memory_regions(hdev);
 
        return 0;
 
@@ -2374,7 +2380,7 @@ static void goya_disable_timestamp(struct hl_device *hdev)
        WREG32(mmPSOC_TIMESTAMP_BASE - CFG_BASE, 0);
 }
 
-static void goya_halt_engines(struct hl_device *hdev, bool hard_reset)
+static void goya_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
        u32 wait_timeout_ms;
 
@@ -2493,6 +2499,7 @@ static void goya_init_firmware_loader(struct hl_device *hdev)
        struct fw_load_mgr *fw_loader = &hdev->fw_loader;
 
        /* fill common fields */
+       fw_loader->linux_loaded = false;
        fw_loader->boot_fit_img.image_name = GOYA_BOOT_FIT_FILE;
        fw_loader->linux_img.image_name = GOYA_LINUX_FW_FILE;
        fw_loader->cpu_timeout = GOYA_CPU_TIMEOUT_USEC;
@@ -2696,14 +2703,7 @@ disable_queues:
        return rc;
 }
 
-/*
- * goya_hw_fini - Goya hardware tear-down code
- *
- * @hdev: pointer to hl_device structure
- * @hard_reset: should we do hard reset to all engines or just reset the
- *              compute/dma engines
- */
-static void goya_hw_fini(struct hl_device *hdev, bool hard_reset)
+static void goya_hw_fini(struct hl_device *hdev, bool hard_reset, bool fw_reset)
 {
        struct goya_device *goya = hdev->asic_specific;
        u32 reset_timeout_ms, cpu_timeout_ms, status;
@@ -2796,7 +2796,7 @@ int goya_resume(struct hl_device *hdev)
        return goya_init_iatu(hdev);
 }
 
-static int goya_cb_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+static int goya_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
                        void *cpu_addr, dma_addr_t dma_addr, size_t size)
 {
        int rc;
@@ -4797,6 +4797,12 @@ void goya_handle_eqe(struct hl_device *hdev, struct hl_eq_entry *eq_entry)
                                >> EQ_CTL_EVENT_TYPE_SHIFT);
        struct goya_device *goya = hdev->asic_specific;
 
+       if (event_type >= GOYA_ASYNC_EVENT_ID_SIZE) {
+               dev_err(hdev->dev, "Event type %u exceeds maximum of %u",
+                               event_type, GOYA_ASYNC_EVENT_ID_SIZE - 1);
+               return;
+       }
+
        goya->events_stat[event_type]++;
        goya->events_stat_aggregate[event_type]++;
 
@@ -5475,14 +5481,14 @@ u64 goya_get_device_time(struct hl_device *hdev)
        return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
 }
 
-static void goya_collective_wait_init_cs(struct hl_cs *cs)
+static int goya_collective_wait_init_cs(struct hl_cs *cs)
 {
-
+       return 0;
 }
 
 static int goya_collective_wait_create_jobs(struct hl_device *hdev,
                struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
-               u32 collective_engine_id)
+               u32 collective_engine_id, u32 encaps_signal_offset)
 {
        return -EINVAL;
 }
@@ -5524,6 +5530,62 @@ static int goya_map_pll_idx_to_fw_idx(u32 pll_idx)
        }
 }
 
+static int goya_gen_sync_to_engine_map(struct hl_device *hdev,
+                               struct hl_sync_to_engine_map *map)
+{
+       /* Not implemented */
+       return 0;
+}
+
+static int goya_monitor_valid(struct hl_mon_state_dump *mon)
+{
+       /* Not implemented */
+       return 0;
+}
+
+static int goya_print_single_monitor(char **buf, size_t *size, size_t *offset,
+                               struct hl_device *hdev,
+                               struct hl_mon_state_dump *mon)
+{
+       /* Not implemented */
+       return 0;
+}
+
+
+static int goya_print_fences_single_engine(
+       struct hl_device *hdev, u64 base_offset, u64 status_base_offset,
+       enum hl_sync_engine_type engine_type, u32 engine_id, char **buf,
+       size_t *size, size_t *offset)
+{
+       /* Not implemented */
+       return 0;
+}
+
+
+static struct hl_state_dump_specs_funcs goya_state_dump_funcs = {
+       .monitor_valid = goya_monitor_valid,
+       .print_single_monitor = goya_print_single_monitor,
+       .gen_sync_to_engine_map = goya_gen_sync_to_engine_map,
+       .print_fences_single_engine = goya_print_fences_single_engine,
+};
+
+static void goya_state_dump_init(struct hl_device *hdev)
+{
+       /* Not implemented */
+       hdev->state_dump_specs.props = goya_state_dump_specs_props;
+       hdev->state_dump_specs.funcs = goya_state_dump_funcs;
+}
+
+static u32 goya_get_sob_addr(struct hl_device *hdev, u32 sob_id)
+{
+       return 0;
+}
+
+static u32 *goya_get_stream_master_qid_arr(void)
+{
+       return NULL;
+}
+
 static const struct hl_asic_funcs goya_funcs = {
        .early_init = goya_early_init,
        .early_fini = goya_early_fini,
@@ -5536,7 +5598,7 @@ static const struct hl_asic_funcs goya_funcs = {
        .halt_engines = goya_halt_engines,
        .suspend = goya_suspend,
        .resume = goya_resume,
-       .cb_mmap = goya_cb_mmap,
+       .mmap = goya_mmap,
        .ring_doorbell = goya_ring_doorbell,
        .pqe_write = goya_pqe_write,
        .asic_dma_alloc_coherent = goya_dma_alloc_coherent,
@@ -5609,7 +5671,11 @@ static const struct hl_asic_funcs goya_funcs = {
        .enable_events_from_fw = goya_enable_events_from_fw,
        .map_pll_idx_to_fw_idx = goya_map_pll_idx_to_fw_idx,
        .init_firmware_loader = goya_init_firmware_loader,
-       .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram
+       .init_cpu_scrambler_dram = goya_cpu_init_scrambler_dram,
+       .state_dump_init = goya_state_dump_init,
+       .get_sob_addr = &goya_get_sob_addr,
+       .set_pci_memory_regions = goya_set_pci_memory_regions,
+       .get_stream_master_qid_arr = goya_get_stream_master_qid_arr,
 };
 
 /*
index 80b1d5a..9ff6a44 100644 (file)
@@ -98,6 +98,18 @@ struct hl_eq_fw_alive {
        __u8 pad[7];
 };
 
+enum hl_pcie_addr_dec_cause {
+       PCIE_ADDR_DEC_HBW_ERR_RESP,
+       PCIE_ADDR_DEC_LBW_ERR_RESP,
+       PCIE_ADDR_DEC_TLP_BLOCKED_BY_RR
+};
+
+struct hl_eq_pcie_addr_dec_data {
+       /* enum hl_pcie_addr_dec_cause */
+       __u8 addr_dec_cause;
+       __u8 pad[7];
+};
+
 struct hl_eq_entry {
        struct hl_eq_header hdr;
        union {
@@ -106,6 +118,7 @@ struct hl_eq_entry {
                struct hl_eq_sm_sei_data sm_sei_data;
                struct cpucp_pkt_sync_err pkt_sync_err;
                struct hl_eq_fw_alive fw_alive;
+               struct hl_eq_pcie_addr_dec_data pcie_addr_dec_data;
                __le64 data[7];
        };
 };
@@ -116,7 +129,7 @@ struct hl_eq_entry {
 #define EQ_CTL_READY_MASK              0x80000000
 
 #define EQ_CTL_EVENT_TYPE_SHIFT                16
-#define EQ_CTL_EVENT_TYPE_MASK         0x03FF0000
+#define EQ_CTL_EVENT_TYPE_MASK         0x0FFF0000
 
 #define EQ_CTL_INDEX_SHIFT             0
 #define EQ_CTL_INDEX_MASK              0x0000FFFF
@@ -300,7 +313,7 @@ enum pq_init_status {
  *       The packet's arguments specify the desired sensor and the field to
  *       set.
  *
- * CPUCP_PACKET_PCIE_THROUGHPUT_GET
+ * CPUCP_PACKET_PCIE_THROUGHPUT_GET -
  *       Get throughput of PCIe.
  *       The packet's arguments specify the transaction direction (TX/RX).
  *       The window measurement is 10[msec], and the return value is in KB/sec.
@@ -309,19 +322,19 @@ enum pq_init_status {
  *       Replay count measures number of "replay" events, which is basicly
  *       number of retries done by PCIe.
  *
- * CPUCP_PACKET_TOTAL_ENERGY_GET
+ * CPUCP_PACKET_TOTAL_ENERGY_GET -
  *       Total Energy is measurement of energy from the time FW Linux
  *       is loaded. It is calculated by multiplying the average power
  *       by time (passed from armcp start). The units are in MilliJouls.
  *
- * CPUCP_PACKET_PLL_INFO_GET
+ * CPUCP_PACKET_PLL_INFO_GET -
  *       Fetch frequencies of PLL from the required PLL IP.
  *       The packet's arguments specify the device PLL type
  *       Pll type is the PLL from device pll_index enum.
  *       The result is composed of 4 outputs, each is 16-bit
  *       frequency in MHz.
  *
- * CPUCP_PACKET_POWER_GET
+ * CPUCP_PACKET_POWER_GET -
  *       Fetch the present power consumption of the device (Current * Voltage).
  *
  * CPUCP_PACKET_NIC_PFC_SET -
@@ -345,6 +358,24 @@ enum pq_init_status {
  * CPUCP_PACKET_MSI_INFO_SET -
  *       set the index number for each supported msi type going from
  *       host to device
+ *
+ * CPUCP_PACKET_NIC_XPCS91_REGS_GET -
+ *       Fetch the un/correctable counters values from the NIC MAC.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_GET -
+ *       Fetch various NIC MAC counters from the NIC STAT.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_CLR -
+ *       Clear the various NIC MAC counters in the NIC STAT.
+ *
+ * CPUCP_PACKET_NIC_STAT_REGS_ALL_GET -
+ *       Fetch all NIC MAC counters from the NIC STAT.
+ *
+ * CPUCP_PACKET_IS_IDLE_CHECK -
+ *       Check if the device is IDLE in regard to the DMA/compute engines
+ *       and QMANs. The f/w will return a bitmask where each bit represents
+ *       a different engine or QMAN according to enum cpucp_idle_mask.
+ *       The bit will be 1 if the engine is NOT idle.
  */
 
 enum cpucp_packet_id {
@@ -385,6 +416,11 @@ enum cpucp_packet_id {
        CPUCP_PACKET_NIC_LPBK_SET,              /* internal */
        CPUCP_PACKET_NIC_MAC_CFG,               /* internal */
        CPUCP_PACKET_MSI_INFO_SET,              /* internal */
+       CPUCP_PACKET_NIC_XPCS91_REGS_GET,       /* internal */
+       CPUCP_PACKET_NIC_STAT_REGS_GET,         /* internal */
+       CPUCP_PACKET_NIC_STAT_REGS_CLR,         /* internal */
+       CPUCP_PACKET_NIC_STAT_REGS_ALL_GET,     /* internal */
+       CPUCP_PACKET_IS_IDLE_CHECK,             /* internal */
 };
 
 #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5
@@ -414,6 +450,11 @@ enum cpucp_packet_id {
 #define CPUCP_PKT_VAL_LPBK_IN2_SHIFT   1
 #define CPUCP_PKT_VAL_LPBK_IN2_MASK    0x000000000000001Eull
 
+#define CPUCP_PKT_VAL_MAC_CNT_IN1_SHIFT        0
+#define CPUCP_PKT_VAL_MAC_CNT_IN1_MASK 0x0000000000000001ull
+#define CPUCP_PKT_VAL_MAC_CNT_IN2_SHIFT        1
+#define CPUCP_PKT_VAL_MAC_CNT_IN2_MASK 0x00000000FFFFFFFEull
+
 /* heartbeat status bits */
 #define CPUCP_PKT_HB_STATUS_EQ_FAULT_SHIFT             0
 #define CPUCP_PKT_HB_STATUS_EQ_FAULT_MASK              0x00000001
@@ -467,7 +508,8 @@ struct cpucp_packet {
                __le32 status_mask;
        };
 
-       __le32 reserved;
+       /* For NIC requests */
+       __le32 port_index;
 };
 
 struct cpucp_unmask_irq_arr_packet {
@@ -476,6 +518,12 @@ struct cpucp_unmask_irq_arr_packet {
        __le32 irqs[0];
 };
 
+struct cpucp_nic_status_packet {
+       struct cpucp_packet cpucp_pkt;
+       __le32 length;
+       __le32 data[0];
+};
+
 struct cpucp_array_data_packet {
        struct cpucp_packet cpucp_pkt;
        __le32 length;
@@ -595,6 +643,18 @@ enum pll_index {
        PLL_MAX
 };
 
+enum rl_index {
+       TPC_RL = 0,
+       MME_RL,
+};
+
+enum pvt_index {
+       PVT_SW,
+       PVT_SE,
+       PVT_NW,
+       PVT_NE
+};
+
 /* Event Queue Packets */
 
 struct eq_generic_event {
@@ -700,6 +760,15 @@ struct cpucp_mac_addr {
        __u8 mac_addr[ETH_ALEN];
 };
 
+enum cpucp_serdes_type {
+       TYPE_1_SERDES_TYPE,
+       TYPE_2_SERDES_TYPE,
+       HLS1_SERDES_TYPE,
+       HLS1H_SERDES_TYPE,
+       UNKNOWN_SERDES_TYPE,
+       MAX_NUM_SERDES_TYPE = UNKNOWN_SERDES_TYPE
+};
+
 struct cpucp_nic_info {
        struct cpucp_mac_addr mac_addrs[CPUCP_MAX_NICS];
        __le64 link_mask[CPUCP_NIC_MASK_ARR_LEN];
@@ -708,6 +777,40 @@ struct cpucp_nic_info {
        __le64 link_ext_mask[CPUCP_NIC_MASK_ARR_LEN];
        __u8 qsfp_eeprom[CPUCP_NIC_QSFP_EEPROM_MAX_LEN];
        __le64 auto_neg_mask[CPUCP_NIC_MASK_ARR_LEN];
+       __le16 serdes_type; /* enum cpucp_serdes_type */
+       __u8 reserved[6];
+};
+
+/*
+ * struct cpucp_nic_status - describes the status of a NIC port.
+ * @port: NIC port index.
+ * @bad_format_cnt: e.g. CRC.
+ * @responder_out_of_sequence_psn_cnt: e.g NAK.
+ * @high_ber_reinit_cnt: link reinit due to high BER.
+ * @correctable_err_cnt: e.g. bit-flip.
+ * @uncorrectable_err_cnt: e.g. MAC errors.
+ * @retraining_cnt: re-training counter.
+ * @up: is port up.
+ * @pcs_link: has PCS link.
+ * @phy_ready: is PHY ready.
+ * @auto_neg: is Autoneg enabled.
+ * @timeout_retransmission_cnt: timeout retransmission events
+ * @high_ber_cnt: high ber events
+ */
+struct cpucp_nic_status {
+       __le32 port;
+       __le32 bad_format_cnt;
+       __le32 responder_out_of_sequence_psn_cnt;
+       __le32 high_ber_reinit;
+       __le32 correctable_err_cnt;
+       __le32 uncorrectable_err_cnt;
+       __le32 retraining_cnt;
+       __u8 up;
+       __u8 pcs_link;
+       __u8 phy_ready;
+       __u8 auto_neg;
+       __le32 timeout_retransmission_cnt;
+       __le32 high_ber_cnt;
 };
 
 #endif /* CPUCP_IF_H */
index fa8a5ad..3099653 100644 (file)
  * CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL  Device is unusable and customer support
  *                                     should be contacted.
  *
+ * CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD        HALT ACK from ARC0 is not received
+ *                                     within specified retries after issuing
+ *                                     HALT request. ARC0 appears to be in bad
+ *                                     reset.
+ *
+ * CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD        HALT ACK from ARC1 is not received
+ *                                     within specified retries after issuing
+ *                                     HALT request. ARC1 appears to be in bad
+ *                                     reset.
+ *
+ * CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD RUN ACK from ARC0 is not received
+ *                                     within specified timeout after issuing
+ *                                     RUN request. ARC0 appears to be in bad
+ *                                     reset.
+ *
+ * CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD RUN ACK from ARC1 is not received
+ *                                     within specified timeout after issuing
+ *                                     RUN request. ARC1 appears to be in bad
+ *                                     reset.
+ *
  * CPU_BOOT_ERR0_ENABLED               Error registers enabled.
  *                                     This is a main indication that the
  *                                     running FW populates the error
 #define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL         (1 << 11)
 #define CPU_BOOT_ERR0_PLL_FAIL                 (1 << 12)
 #define CPU_BOOT_ERR0_DEVICE_UNUSABLE_FAIL     (1 << 13)
+#define CPU_BOOT_ERR0_ARC0_HALT_ACK_NOT_RCVD   (1 << 14)
+#define CPU_BOOT_ERR0_ARC1_HALT_ACK_NOT_RCVD   (1 << 15)
+#define CPU_BOOT_ERR0_ARC0_RUN_ACK_NOT_RCVD    (1 << 16)
+#define CPU_BOOT_ERR0_ARC1_RUN_ACK_NOT_RCVD    (1 << 17)
 #define CPU_BOOT_ERR0_ENABLED                  (1 << 31)
 #define CPU_BOOT_ERR1_ENABLED                  (1 << 31)
 
  *                                     configured and is ready for use.
  *                                     Initialized in: ppboot
  *
+ * CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN     NIC MAC channels init is done by FW and
+ *                                     any access to them is done via the FW.
+ *                                     Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_DYN_PLL_EN                Dynamic PLL configuration is enabled.
  *                                     FW sends to host a bitmap of supported
  *                                     PLLs.
  *                                     prevent IRQs overriding each other.
  *                                     Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN
+ *                                     NIC STAT and XPCS91 access is restricted
+ *                                     and is done via FW only.
+ *                                     Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN
+ *                                     NIC STAT get all is supported.
+ *                                     Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN
+ *                                     F/W checks if the device is idle by reading defined set
+ *                                     of registers. It returns a bitmask of all the engines,
+ *                                     where a bit is set if the engine is not idle.
+ *                                     Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED           Device status register enabled.
  *                                     This is a main indication that the
  *                                     running FW populates the device status
 #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN                        (1 << 15)
 #define CPU_BOOT_DEV_STS0_FW_LD_COM_EN                 (1 << 16)
 #define CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN              (1 << 17)
+#define CPU_BOOT_DEV_STS0_FW_NIC_MAC_EN                        (1 << 18)
 #define CPU_BOOT_DEV_STS0_DYN_PLL_EN                   (1 << 19)
 #define CPU_BOOT_DEV_STS0_GIC_PRIVILEGED_EN            (1 << 20)
 #define CPU_BOOT_DEV_STS0_EQ_INDEX_EN                  (1 << 21)
 #define CPU_BOOT_DEV_STS0_MULTI_IRQ_POLL_EN            (1 << 22)
+#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_XPCS91_EN                (1 << 23)
+#define CPU_BOOT_DEV_STS0_FW_NIC_STAT_EXT_EN           (1 << 24)
+#define CPU_BOOT_DEV_STS0_IS_IDLE_CHECK_EN             (1 << 25)
 #define CPU_BOOT_DEV_STS0_ENABLED                      (1 << 31)
 #define CPU_BOOT_DEV_STS1_ENABLED                      (1 << 31)
 
@@ -313,10 +360,7 @@ struct cpu_dyn_regs {
        __le32 hw_state;
        __le32 kmd_msg_to_cpu;
        __le32 cpu_cmd_status_to_host;
-       union {
-               __le32 gic_host_irq_ctrl;
-               __le32 gic_host_pi_upd_irq;
-       };
+       __le32 gic_host_pi_upd_irq;
        __le32 gic_tpc_qm_irq_ctrl;
        __le32 gic_mme_qm_irq_ctrl;
        __le32 gic_dma_qm_irq_ctrl;
@@ -324,7 +368,9 @@ struct cpu_dyn_regs {
        __le32 gic_dma_core_irq_ctrl;
        __le32 gic_host_halt_irq;
        __le32 gic_host_ints_irq;
-       __le32 reserved1[24];           /* reserve for future use */
+       __le32 gic_host_soft_rst_irq;
+       __le32 gic_rot_qm_irq_ctrl;
+       __le32 reserved1[22];           /* reserve for future use */
 };
 
 /* TODO: remove the desc magic after the code is updated to use message */
@@ -462,6 +508,11 @@ struct lkd_fw_comms_msg {
  *                             Do not wait for BMC response.
  *
  * COMMS_LOW_PLL_OPP           Initialize PLLs for low OPP.
+ *
+ * COMMS_PREP_DESC_ELBI                Same as COMMS_PREP_DESC only that the memory
+ *                             space is allocated in a ELBI access only
+ *                             address range.
+ *
  */
 enum comms_cmd {
        COMMS_NOOP = 0,
@@ -474,6 +525,7 @@ enum comms_cmd {
        COMMS_GOTO_WFE = 7,
        COMMS_SKIP_BMC = 8,
        COMMS_LOW_PLL_OPP = 9,
+       COMMS_PREP_DESC_ELBI = 10,
        COMMS_INVLD_LAST
 };
 
index 5bb54b3..ffdfbd9 100644 (file)
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_1                     0x4F2004
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_2047                  0x4F3FFC
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRL_0               0x4F4000
+#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_ADDRH_0               0x4F4800
+#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_PAY_DATA_0                0x4F5000
+#define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_ARM_0                     0x4F5800
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0                  0x4F6000
 #define mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_511                0x4F67FC
 
index 9aea7e9..acc85d3 100644 (file)
@@ -449,4 +449,21 @@ enum axi_id {
 #define PCIE_AUX_FLR_CTRL_HW_CTRL_MASK                               0x1
 #define PCIE_AUX_FLR_CTRL_INT_MASK_MASK                              0x2
 
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_SHIFT        0
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_VALID_MASK         0x1
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_SHIFT      1
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0_PENDING_MASK       0x1FE
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_SHIFT             0
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SID_MASK              0xFF
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_SHIFT            8
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_MASK_MASK             0xFF00
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_SHIFT             16
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOP_MASK              0x10000
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_SHIFT             17
+#define SYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_ARM_0_SOD_MASK              0xFFFE0000
+#define TPC0_QM_CP_STS_0_FENCE_ID_SHIFT                              20
+#define TPC0_QM_CP_STS_0_FENCE_ID_MASK                               0x300000
+#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_SHIFT                     22
+#define TPC0_QM_CP_STS_0_FENCE_IN_PROGRESS_MASK                      0x400000
+
 #endif /* GAUDI_MASKS_H_ */
index d95d416..b9bd5a7 100644 (file)
@@ -12,8 +12,6 @@
  * PSOC scratch-pad registers
  */
 #define mmHW_STATE                     mmPSOC_GLOBAL_CONF_SCRATCHPAD_0
-/* TODO: remove mmGIC_HOST_IRQ_CTRL_POLL_REG */
-#define mmGIC_HOST_IRQ_CTRL_POLL_REG   mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
 #define mmGIC_HOST_PI_UPD_IRQ_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_1
 #define mmGIC_TPC_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_2
 #define mmGIC_MME_QM_IRQ_CTRL_POLL_REG mmPSOC_GLOBAL_CONF_SCRATCHPAD_3
index 9251441..7f473f9 100644 (file)
@@ -346,29 +346,45 @@ static bool preamble_next(struct nvdimm_drvdata *ndd,
                        free, nslot);
 }
 
+static bool nsl_validate_checksum(struct nvdimm_drvdata *ndd,
+                                 struct nd_namespace_label *nd_label)
+{
+       u64 sum, sum_save;
+
+       if (!namespace_label_has(ndd, checksum))
+               return true;
+
+       sum_save = nsl_get_checksum(ndd, nd_label);
+       nsl_set_checksum(ndd, nd_label, 0);
+       sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
+       nsl_set_checksum(ndd, nd_label, sum_save);
+       return sum == sum_save;
+}
+
+static void nsl_calculate_checksum(struct nvdimm_drvdata *ndd,
+                                  struct nd_namespace_label *nd_label)
+{
+       u64 sum;
+
+       if (!namespace_label_has(ndd, checksum))
+               return;
+       nsl_set_checksum(ndd, nd_label, 0);
+       sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
+       nsl_set_checksum(ndd, nd_label, sum);
+}
+
 static bool slot_valid(struct nvdimm_drvdata *ndd,
                struct nd_namespace_label *nd_label, u32 slot)
 {
+       bool valid;
+
        /* check that we are written where we expect to be written */
-       if (slot != __le32_to_cpu(nd_label->slot))
+       if (slot != nsl_get_slot(ndd, nd_label))
                return false;
-
-       /* check checksum */
-       if (namespace_label_has(ndd, checksum)) {
-               u64 sum, sum_save;
-
-               sum_save = __le64_to_cpu(nd_label->checksum);
-               nd_label->checksum = __cpu_to_le64(0);
-               sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
-               nd_label->checksum = __cpu_to_le64(sum_save);
-               if (sum != sum_save) {
-                       dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n",
-                               slot, sum);
-                       return false;
-               }
-       }
-
-       return true;
+       valid = nsl_validate_checksum(ndd, nd_label);
+       if (!valid)
+               dev_dbg(ndd->dev, "fail checksum. slot: %d\n", slot);
+       return valid;
 }
 
 int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
@@ -395,13 +411,13 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
                        continue;
 
                memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
-               flags = __le32_to_cpu(nd_label->flags);
+               flags = nsl_get_flags(ndd, nd_label);
                if (test_bit(NDD_NOBLK, &nvdimm->flags))
                        flags &= ~NSLABEL_FLAG_LOCAL;
                nd_label_gen_id(&label_id, label_uuid, flags);
                res = nvdimm_allocate_dpa(ndd, &label_id,
-                               __le64_to_cpu(nd_label->dpa),
-                               __le64_to_cpu(nd_label->rawsize));
+                                         nsl_get_dpa(ndd, nd_label),
+                                         nsl_get_rawsize(ndd, nd_label));
                nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
                if (!res)
                        return -EBUSY;
@@ -548,9 +564,9 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
                nd_label = to_label(ndd, slot);
 
                if (!slot_valid(ndd, nd_label, slot)) {
-                       u32 label_slot = __le32_to_cpu(nd_label->slot);
-                       u64 size = __le64_to_cpu(nd_label->rawsize);
-                       u64 dpa = __le64_to_cpu(nd_label->dpa);
+                       u32 label_slot = nsl_get_slot(ndd, nd_label);
+                       u64 size = nsl_get_rawsize(ndd, nd_label);
+                       u64 dpa = nsl_get_dpa(ndd, nd_label);
 
                        dev_dbg(ndd->dev,
                                "slot%d invalid slot: %d dpa: %llx size: %llx\n",
@@ -708,7 +724,7 @@ static unsigned long nd_label_offset(struct nvdimm_drvdata *ndd,
                - (unsigned long) to_namespace_index(ndd, 0);
 }
 
-enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
+static enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid)
 {
        if (guid_equal(guid, &nvdimm_btt_guid))
                return NVDIMM_CCLASS_BTT;
@@ -756,6 +772,45 @@ static void reap_victim(struct nd_mapping *nd_mapping,
        victim->label = NULL;
 }
 
+static void nsl_set_type_guid(struct nvdimm_drvdata *ndd,
+                             struct nd_namespace_label *nd_label, guid_t *guid)
+{
+       if (namespace_label_has(ndd, type_guid))
+               guid_copy(&nd_label->type_guid, guid);
+}
+
+bool nsl_validate_type_guid(struct nvdimm_drvdata *ndd,
+                           struct nd_namespace_label *nd_label, guid_t *guid)
+{
+       if (!namespace_label_has(ndd, type_guid))
+               return true;
+       if (!guid_equal(&nd_label->type_guid, guid)) {
+               dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n", guid,
+                       &nd_label->type_guid);
+               return false;
+       }
+       return true;
+}
+
+static void nsl_set_claim_class(struct nvdimm_drvdata *ndd,
+                               struct nd_namespace_label *nd_label,
+                               enum nvdimm_claim_class claim_class)
+{
+       if (!namespace_label_has(ndd, abstraction_guid))
+               return;
+       guid_copy(&nd_label->abstraction_guid,
+                 to_abstraction_guid(claim_class,
+                                     &nd_label->abstraction_guid));
+}
+
+enum nvdimm_claim_class nsl_get_claim_class(struct nvdimm_drvdata *ndd,
+                                           struct nd_namespace_label *nd_label)
+{
+       if (!namespace_label_has(ndd, abstraction_guid))
+               return NVDIMM_CCLASS_NONE;
+       return to_nvdimm_cclass(&nd_label->abstraction_guid);
+}
+
 static int __pmem_label_update(struct nd_region *nd_region,
                struct nd_mapping *nd_mapping, struct nd_namespace_pmem *nspm,
                int pos, unsigned long flags)
@@ -797,29 +852,18 @@ static int __pmem_label_update(struct nd_region *nd_region,
        nd_label = to_label(ndd, slot);
        memset(nd_label, 0, sizeof_namespace_label(ndd));
        memcpy(nd_label->uuid, nspm->uuid, NSLABEL_UUID_LEN);
-       if (nspm->alt_name)
-               memcpy(nd_label->name, nspm->alt_name, NSLABEL_NAME_LEN);
-       nd_label->flags = __cpu_to_le32(flags);
-       nd_label->nlabel = __cpu_to_le16(nd_region->ndr_mappings);
-       nd_label->position = __cpu_to_le16(pos);
-       nd_label->isetcookie = __cpu_to_le64(cookie);
-       nd_label->rawsize = __cpu_to_le64(resource_size(res));
-       nd_label->lbasize = __cpu_to_le64(nspm->lbasize);
-       nd_label->dpa = __cpu_to_le64(res->start);
-       nd_label->slot = __cpu_to_le32(slot);
-       if (namespace_label_has(ndd, type_guid))
-               guid_copy(&nd_label->type_guid, &nd_set->type_guid);
-       if (namespace_label_has(ndd, abstraction_guid))
-               guid_copy(&nd_label->abstraction_guid,
-                               to_abstraction_guid(ndns->claim_class,
-                                       &nd_label->abstraction_guid));
-       if (namespace_label_has(ndd, checksum)) {
-               u64 sum;
-
-               nd_label->checksum = __cpu_to_le64(0);
-               sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
-               nd_label->checksum = __cpu_to_le64(sum);
-       }
+       nsl_set_name(ndd, nd_label, nspm->alt_name);
+       nsl_set_flags(ndd, nd_label, flags);
+       nsl_set_nlabel(ndd, nd_label, nd_region->ndr_mappings);
+       nsl_set_position(ndd, nd_label, pos);
+       nsl_set_isetcookie(ndd, nd_label, cookie);
+       nsl_set_rawsize(ndd, nd_label, resource_size(res));
+       nsl_set_lbasize(ndd, nd_label, nspm->lbasize);
+       nsl_set_dpa(ndd, nd_label, res->start);
+       nsl_set_slot(ndd, nd_label, slot);
+       nsl_set_type_guid(ndd, nd_label, &nd_set->type_guid);
+       nsl_set_claim_class(ndd, nd_label, ndns->claim_class);
+       nsl_calculate_checksum(ndd, nd_label);
        nd_dbg_dpa(nd_region, ndd, res, "\n");
 
        /* update label */
@@ -879,9 +923,9 @@ static struct resource *to_resource(struct nvdimm_drvdata *ndd,
        struct resource *res;
 
        for_each_dpa_resource(ndd, res) {
-               if (res->start != __le64_to_cpu(nd_label->dpa))
+               if (res->start != nsl_get_dpa(ndd, nd_label))
                        continue;
-               if (resource_size(res) != __le64_to_cpu(nd_label->rawsize))
+               if (resource_size(res) != nsl_get_rawsize(ndd, nd_label))
                        continue;
                return res;
        }
@@ -889,6 +933,59 @@ static struct resource *to_resource(struct nvdimm_drvdata *ndd,
        return NULL;
 }
 
+/*
+ * Use the presence of the type_guid as a flag to determine isetcookie
+ * usage and nlabel + position policy for blk-aperture namespaces.
+ */
+static void nsl_set_blk_isetcookie(struct nvdimm_drvdata *ndd,
+                                  struct nd_namespace_label *nd_label,
+                                  u64 isetcookie)
+{
+       if (namespace_label_has(ndd, type_guid)) {
+               nsl_set_isetcookie(ndd, nd_label, isetcookie);
+               return;
+       }
+       nsl_set_isetcookie(ndd, nd_label, 0); /* N/A */
+}
+
+bool nsl_validate_blk_isetcookie(struct nvdimm_drvdata *ndd,
+                                struct nd_namespace_label *nd_label,
+                                u64 isetcookie)
+{
+       if (!namespace_label_has(ndd, type_guid))
+               return true;
+
+       if (nsl_get_isetcookie(ndd, nd_label) != isetcookie) {
+               dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n", isetcookie,
+                       nsl_get_isetcookie(ndd, nd_label));
+               return false;
+       }
+
+       return true;
+}
+
+static void nsl_set_blk_nlabel(struct nvdimm_drvdata *ndd,
+                              struct nd_namespace_label *nd_label, int nlabel,
+                              bool first)
+{
+       if (!namespace_label_has(ndd, type_guid)) {
+               nsl_set_nlabel(ndd, nd_label, 0); /* N/A */
+               return;
+       }
+       nsl_set_nlabel(ndd, nd_label, first ? nlabel : 0xffff);
+}
+
+static void nsl_set_blk_position(struct nvdimm_drvdata *ndd,
+                                struct nd_namespace_label *nd_label,
+                                bool first)
+{
+       if (!namespace_label_has(ndd, type_guid)) {
+               nsl_set_position(ndd, nd_label, 0);
+               return;
+       }
+       nsl_set_position(ndd, nd_label, first ? 0 : 0xffff);
+}
+
 /*
  * 1/ Account all the labels that can be freed after this update
  * 2/ Allocate and write the label to the staging (next) index
@@ -1017,50 +1114,21 @@ static int __blk_label_update(struct nd_region *nd_region,
                nd_label = to_label(ndd, slot);
                memset(nd_label, 0, sizeof_namespace_label(ndd));
                memcpy(nd_label->uuid, nsblk->uuid, NSLABEL_UUID_LEN);
-               if (nsblk->alt_name)
-                       memcpy(nd_label->name, nsblk->alt_name,
-                                       NSLABEL_NAME_LEN);
-               nd_label->flags = __cpu_to_le32(NSLABEL_FLAG_LOCAL);
-
-               /*
-                * Use the presence of the type_guid as a flag to
-                * determine isetcookie usage and nlabel + position
-                * policy for blk-aperture namespaces.
-                */
-               if (namespace_label_has(ndd, type_guid)) {
-                       if (i == min_dpa_idx) {
-                               nd_label->nlabel = __cpu_to_le16(nsblk->num_resources);
-                               nd_label->position = __cpu_to_le16(0);
-                       } else {
-                               nd_label->nlabel = __cpu_to_le16(0xffff);
-                               nd_label->position = __cpu_to_le16(0xffff);
-                       }
-                       nd_label->isetcookie = __cpu_to_le64(nd_set->cookie2);
-               } else {
-                       nd_label->nlabel = __cpu_to_le16(0); /* N/A */
-                       nd_label->position = __cpu_to_le16(0); /* N/A */
-                       nd_label->isetcookie = __cpu_to_le64(0); /* N/A */
-               }
-
-               nd_label->dpa = __cpu_to_le64(res->start);
-               nd_label->rawsize = __cpu_to_le64(resource_size(res));
-               nd_label->lbasize = __cpu_to_le64(nsblk->lbasize);
-               nd_label->slot = __cpu_to_le32(slot);
-               if (namespace_label_has(ndd, type_guid))
-                       guid_copy(&nd_label->type_guid, &nd_set->type_guid);
-               if (namespace_label_has(ndd, abstraction_guid))
-                       guid_copy(&nd_label->abstraction_guid,
-                                       to_abstraction_guid(ndns->claim_class,
-                                               &nd_label->abstraction_guid));
-
-               if (namespace_label_has(ndd, checksum)) {
-                       u64 sum;
-
-                       nd_label->checksum = __cpu_to_le64(0);
-                       sum = nd_fletcher64(nd_label,
-                                       sizeof_namespace_label(ndd), 1);
-                       nd_label->checksum = __cpu_to_le64(sum);
-               }
+               nsl_set_name(ndd, nd_label, nsblk->alt_name);
+               nsl_set_flags(ndd, nd_label, NSLABEL_FLAG_LOCAL);
+
+               nsl_set_blk_nlabel(ndd, nd_label, nsblk->num_resources,
+                                  i == min_dpa_idx);
+               nsl_set_blk_position(ndd, nd_label, i == min_dpa_idx);
+               nsl_set_blk_isetcookie(ndd, nd_label, nd_set->cookie2);
+
+               nsl_set_dpa(ndd, nd_label, res->start);
+               nsl_set_rawsize(ndd, nd_label, resource_size(res));
+               nsl_set_lbasize(ndd, nd_label, nsblk->lbasize);
+               nsl_set_slot(ndd, nd_label, slot);
+               nsl_set_type_guid(ndd, nd_label, &nd_set->type_guid);
+               nsl_set_claim_class(ndd, nd_label, ndns->claim_class);
+               nsl_calculate_checksum(ndd, nd_label);
 
                /* update label */
                offset = nd_label_offset(ndd, nd_label);
index 956b6d1..31f94fa 100644 (file)
@@ -135,7 +135,6 @@ struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
 u32 nd_label_alloc_slot(struct nvdimm_drvdata *ndd);
 bool nd_label_free_slot(struct nvdimm_drvdata *ndd, u32 slot);
 u32 nd_label_nfree(struct nvdimm_drvdata *ndd);
-enum nvdimm_claim_class to_nvdimm_cclass(guid_t *guid);
 struct nd_region;
 struct nd_namespace_pmem;
 struct nd_namespace_blk;
index 7454782..4cec171 100644 (file)
@@ -1235,7 +1235,7 @@ static int namespace_update_uuid(struct nd_region *nd_region,
                        if (!nd_label)
                                continue;
                        nd_label_gen_id(&label_id, nd_label->uuid,
-                                       __le32_to_cpu(nd_label->flags));
+                                       nsl_get_flags(ndd, nd_label));
                        if (strcmp(old_label_id.id, label_id.id) == 0)
                                set_bit(ND_LABEL_REAP, &label_ent->flags);
                }
@@ -1847,28 +1847,21 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
                list_for_each_entry(label_ent, &nd_mapping->labels, list) {
                        struct nd_namespace_label *nd_label = label_ent->label;
                        u16 position, nlabel;
-                       u64 isetcookie;
 
                        if (!nd_label)
                                continue;
-                       isetcookie = __le64_to_cpu(nd_label->isetcookie);
-                       position = __le16_to_cpu(nd_label->position);
-                       nlabel = __le16_to_cpu(nd_label->nlabel);
+                       position = nsl_get_position(ndd, nd_label);
+                       nlabel = nsl_get_nlabel(ndd, nd_label);
 
-                       if (isetcookie != cookie)
+                       if (!nsl_validate_isetcookie(ndd, nd_label, cookie))
                                continue;
 
                        if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
                                continue;
 
-                       if (namespace_label_has(ndd, type_guid)
-                                       && !guid_equal(&nd_set->type_guid,
-                                               &nd_label->type_guid)) {
-                               dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
-                                               &nd_set->type_guid,
-                                               &nd_label->type_guid);
+                       if (!nsl_validate_type_guid(ndd, nd_label,
+                                                   &nd_set->type_guid))
                                continue;
-                       }
 
                        if (found_uuid) {
                                dev_dbg(ndd->dev, "duplicate entry for uuid\n");
@@ -1923,8 +1916,8 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
                 */
                hw_start = nd_mapping->start;
                hw_end = hw_start + nd_mapping->size;
-               pmem_start = __le64_to_cpu(nd_label->dpa);
-               pmem_end = pmem_start + __le64_to_cpu(nd_label->rawsize);
+               pmem_start = nsl_get_dpa(ndd, nd_label);
+               pmem_end = pmem_start + nsl_get_rawsize(ndd, nd_label);
                if (pmem_start >= hw_start && pmem_start < hw_end
                                && pmem_end <= hw_end && pmem_end > hw_start)
                        /* pass */;
@@ -1947,14 +1940,16 @@ static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
  * @nd_label: target pmem namespace label to evaluate
  */
 static struct device *create_namespace_pmem(struct nd_region *nd_region,
-               struct nd_namespace_index *nsindex,
-               struct nd_namespace_label *nd_label)
+                                           struct nd_mapping *nd_mapping,
+                                           struct nd_namespace_label *nd_label)
 {
+       struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+       struct nd_namespace_index *nsindex =
+               to_namespace_index(ndd, ndd->ns_current);
        u64 cookie = nd_region_interleave_set_cookie(nd_region, nsindex);
        u64 altcookie = nd_region_interleave_set_altcookie(nd_region);
        struct nd_label_ent *label_ent;
        struct nd_namespace_pmem *nspm;
-       struct nd_mapping *nd_mapping;
        resource_size_t size = 0;
        struct resource *res;
        struct device *dev;
@@ -1966,10 +1961,10 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
                return ERR_PTR(-ENXIO);
        }
 
-       if (__le64_to_cpu(nd_label->isetcookie) != cookie) {
+       if (!nsl_validate_isetcookie(ndd, nd_label, cookie)) {
                dev_dbg(&nd_region->dev, "invalid cookie in label: %pUb\n",
                                nd_label->uuid);
-               if (__le64_to_cpu(nd_label->isetcookie) != altcookie)
+               if (!nsl_validate_isetcookie(ndd, nd_label, altcookie))
                        return ERR_PTR(-EAGAIN);
 
                dev_dbg(&nd_region->dev, "valid altcookie in label: %pUb\n",
@@ -2037,20 +2032,18 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
                        continue;
                }
 
-               size += __le64_to_cpu(label0->rawsize);
-               if (__le16_to_cpu(label0->position) != 0)
+               ndd = to_ndd(nd_mapping);
+               size += nsl_get_rawsize(ndd, label0);
+               if (nsl_get_position(ndd, label0) != 0)
                        continue;
                WARN_ON(nspm->alt_name || nspm->uuid);
-               nspm->alt_name = kmemdup((void __force *) label0->name,
-                               NSLABEL_NAME_LEN, GFP_KERNEL);
+               nspm->alt_name = kmemdup(nsl_ref_name(ndd, label0),
+                                        NSLABEL_NAME_LEN, GFP_KERNEL);
                nspm->uuid = kmemdup((void __force *) label0->uuid,
                                NSLABEL_UUID_LEN, GFP_KERNEL);
-               nspm->lbasize = __le64_to_cpu(label0->lbasize);
-               ndd = to_ndd(nd_mapping);
-               if (namespace_label_has(ndd, abstraction_guid))
-                       nspm->nsio.common.claim_class
-                               = to_nvdimm_cclass(&label0->abstraction_guid);
-
+               nspm->lbasize = nsl_get_lbasize(ndd, label0);
+               nspm->nsio.common.claim_class =
+                       nsl_get_claim_class(ndd, label0);
        }
 
        if (!nspm->alt_name || !nspm->uuid) {
@@ -2237,7 +2230,7 @@ static int add_namespace_resource(struct nd_region *nd_region,
                if (is_namespace_blk(devs[i])) {
                        res = nsblk_add_resource(nd_region, ndd,
                                        to_nd_namespace_blk(devs[i]),
-                                       __le64_to_cpu(nd_label->dpa));
+                                       nsl_get_dpa(ndd, nd_label));
                        if (!res)
                                return -ENXIO;
                        nd_dbg_dpa(nd_region, ndd, res, "%d assign\n", count);
@@ -2265,21 +2258,10 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
        struct device *dev = NULL;
        struct resource *res;
 
-       if (namespace_label_has(ndd, type_guid)) {
-               if (!guid_equal(&nd_set->type_guid, &nd_label->type_guid)) {
-                       dev_dbg(ndd->dev, "expect type_guid %pUb got %pUb\n",
-                                       &nd_set->type_guid,
-                                       &nd_label->type_guid);
-                       return ERR_PTR(-EAGAIN);
-               }
-
-               if (nd_label->isetcookie != __cpu_to_le64(nd_set->cookie2)) {
-                       dev_dbg(ndd->dev, "expect cookie %#llx got %#llx\n",
-                                       nd_set->cookie2,
-                                       __le64_to_cpu(nd_label->isetcookie));
-                       return ERR_PTR(-EAGAIN);
-               }
-       }
+       if (!nsl_validate_type_guid(ndd, nd_label, &nd_set->type_guid))
+               return ERR_PTR(-EAGAIN);
+       if (!nsl_validate_blk_isetcookie(ndd, nd_label, nd_set->cookie2))
+               return ERR_PTR(-EAGAIN);
 
        nsblk = kzalloc(sizeof(*nsblk), GFP_KERNEL);
        if (!nsblk)
@@ -2288,23 +2270,19 @@ static struct device *create_namespace_blk(struct nd_region *nd_region,
        dev->type = &namespace_blk_device_type;
        dev->parent = &nd_region->dev;
        nsblk->id = -1;
-       nsblk->lbasize = __le64_to_cpu(nd_label->lbasize);
-       nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN,
-                       GFP_KERNEL);
-       if (namespace_label_has(ndd, abstraction_guid))
-               nsblk->common.claim_class
-                       = to_nvdimm_cclass(&nd_label->abstraction_guid);
+       nsblk->lbasize = nsl_get_lbasize(ndd, nd_label);
+       nsblk->uuid = kmemdup(nd_label->uuid, NSLABEL_UUID_LEN, GFP_KERNEL);
+       nsblk->common.claim_class = nsl_get_claim_class(ndd, nd_label);
        if (!nsblk->uuid)
                goto blk_err;
-       memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
+       nsl_get_name(ndd, nd_label, name);
        if (name[0]) {
-               nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN,
-                               GFP_KERNEL);
+               nsblk->alt_name = kmemdup(name, NSLABEL_NAME_LEN, GFP_KERNEL);
                if (!nsblk->alt_name)
                        goto blk_err;
        }
        res = nsblk_add_resource(nd_region, ndd, nsblk,
-                       __le64_to_cpu(nd_label->dpa));
+                       nsl_get_dpa(ndd, nd_label));
        if (!res)
                goto blk_err;
        nd_dbg_dpa(nd_region, ndd, res, "%d: assign\n", count);
@@ -2345,6 +2323,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
        struct device *dev, **devs = NULL;
        struct nd_label_ent *label_ent, *e;
        struct nd_mapping *nd_mapping = &nd_region->mapping[0];
+       struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
        resource_size_t map_end = nd_mapping->start + nd_mapping->size - 1;
 
        /* "safe" because create_namespace_pmem() might list_move() label_ent */
@@ -2355,7 +2334,7 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
                if (!nd_label)
                        continue;
-               flags = __le32_to_cpu(nd_label->flags);
+               flags = nsl_get_flags(ndd, nd_label);
                if (is_nd_blk(&nd_region->dev)
                                == !!(flags & NSLABEL_FLAG_LOCAL))
                        /* pass, region matches label type */;
@@ -2363,9 +2342,9 @@ static struct device **scan_labels(struct nd_region *nd_region)
                        continue;
 
                /* skip labels that describe extents outside of the region */
-               if (__le64_to_cpu(nd_label->dpa) < nd_mapping->start ||
-                   __le64_to_cpu(nd_label->dpa) > map_end)
-                               continue;
+               if (nsl_get_dpa(ndd, nd_label) < nd_mapping->start ||
+                   nsl_get_dpa(ndd, nd_label) > map_end)
+                       continue;
 
                i = add_namespace_resource(nd_region, nd_label, devs, count);
                if (i < 0)
@@ -2381,13 +2360,9 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
                if (is_nd_blk(&nd_region->dev))
                        dev = create_namespace_blk(nd_region, nd_label, count);
-               else {
-                       struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
-                       struct nd_namespace_index *nsindex;
-
-                       nsindex = to_namespace_index(ndd, ndd->ns_current);
-                       dev = create_namespace_pmem(nd_region, nsindex, nd_label);
-               }
+               else
+                       dev = create_namespace_pmem(nd_region, nd_mapping,
+                                                   nd_label);
 
                if (IS_ERR(dev)) {
                        switch (PTR_ERR(dev)) {
@@ -2571,10 +2546,10 @@ static int init_active_labels(struct nd_region *nd_region)
                                break;
                        label = nd_label_active(ndd, j);
                        if (test_bit(NDD_NOBLK, &nvdimm->flags)) {
-                               u32 flags = __le32_to_cpu(label->flags);
+                               u32 flags = nsl_get_flags(ndd, label);
 
                                flags &= ~NSLABEL_FLAG_LOCAL;
-                               label->flags = __cpu_to_le32(flags);
+                               nsl_set_flags(ndd, label, flags);
                        }
                        label_ent->label = label;
 
index 696b555..5467ebb 100644 (file)
@@ -35,6 +35,156 @@ struct nvdimm_drvdata {
        struct kref kref;
 };
 
+static inline const u8 *nsl_ref_name(struct nvdimm_drvdata *ndd,
+                                    struct nd_namespace_label *nd_label)
+{
+       return nd_label->name;
+}
+
+static inline u8 *nsl_get_name(struct nvdimm_drvdata *ndd,
+                              struct nd_namespace_label *nd_label, u8 *name)
+{
+       return memcpy(name, nd_label->name, NSLABEL_NAME_LEN);
+}
+
+static inline u8 *nsl_set_name(struct nvdimm_drvdata *ndd,
+                              struct nd_namespace_label *nd_label, u8 *name)
+{
+       if (!name)
+               return NULL;
+       return memcpy(nd_label->name, name, NSLABEL_NAME_LEN);
+}
+
+static inline u32 nsl_get_slot(struct nvdimm_drvdata *ndd,
+                              struct nd_namespace_label *nd_label)
+{
+       return __le32_to_cpu(nd_label->slot);
+}
+
+static inline void nsl_set_slot(struct nvdimm_drvdata *ndd,
+                               struct nd_namespace_label *nd_label, u32 slot)
+{
+       nd_label->slot = __cpu_to_le32(slot);
+}
+
+static inline u64 nsl_get_checksum(struct nvdimm_drvdata *ndd,
+                                  struct nd_namespace_label *nd_label)
+{
+       return __le64_to_cpu(nd_label->checksum);
+}
+
+static inline void nsl_set_checksum(struct nvdimm_drvdata *ndd,
+                                   struct nd_namespace_label *nd_label,
+                                   u64 checksum)
+{
+       nd_label->checksum = __cpu_to_le64(checksum);
+}
+
+static inline u32 nsl_get_flags(struct nvdimm_drvdata *ndd,
+                               struct nd_namespace_label *nd_label)
+{
+       return __le32_to_cpu(nd_label->flags);
+}
+
+static inline void nsl_set_flags(struct nvdimm_drvdata *ndd,
+                                struct nd_namespace_label *nd_label, u32 flags)
+{
+       nd_label->flags = __cpu_to_le32(flags);
+}
+
+static inline u64 nsl_get_dpa(struct nvdimm_drvdata *ndd,
+                             struct nd_namespace_label *nd_label)
+{
+       return __le64_to_cpu(nd_label->dpa);
+}
+
+static inline void nsl_set_dpa(struct nvdimm_drvdata *ndd,
+                              struct nd_namespace_label *nd_label, u64 dpa)
+{
+       nd_label->dpa = __cpu_to_le64(dpa);
+}
+
+static inline u64 nsl_get_rawsize(struct nvdimm_drvdata *ndd,
+                                 struct nd_namespace_label *nd_label)
+{
+       return __le64_to_cpu(nd_label->rawsize);
+}
+
+static inline void nsl_set_rawsize(struct nvdimm_drvdata *ndd,
+                                  struct nd_namespace_label *nd_label,
+                                  u64 rawsize)
+{
+       nd_label->rawsize = __cpu_to_le64(rawsize);
+}
+
+static inline u64 nsl_get_isetcookie(struct nvdimm_drvdata *ndd,
+                                    struct nd_namespace_label *nd_label)
+{
+       return __le64_to_cpu(nd_label->isetcookie);
+}
+
+static inline void nsl_set_isetcookie(struct nvdimm_drvdata *ndd,
+                                     struct nd_namespace_label *nd_label,
+                                     u64 isetcookie)
+{
+       nd_label->isetcookie = __cpu_to_le64(isetcookie);
+}
+
+static inline bool nsl_validate_isetcookie(struct nvdimm_drvdata *ndd,
+                                          struct nd_namespace_label *nd_label,
+                                          u64 cookie)
+{
+       return cookie == __le64_to_cpu(nd_label->isetcookie);
+}
+
+static inline u16 nsl_get_position(struct nvdimm_drvdata *ndd,
+                                  struct nd_namespace_label *nd_label)
+{
+       return __le16_to_cpu(nd_label->position);
+}
+
+static inline void nsl_set_position(struct nvdimm_drvdata *ndd,
+                                   struct nd_namespace_label *nd_label,
+                                   u16 position)
+{
+       nd_label->position = __cpu_to_le16(position);
+}
+
+
+static inline u16 nsl_get_nlabel(struct nvdimm_drvdata *ndd,
+                                struct nd_namespace_label *nd_label)
+{
+       return __le16_to_cpu(nd_label->nlabel);
+}
+
+static inline void nsl_set_nlabel(struct nvdimm_drvdata *ndd,
+                                 struct nd_namespace_label *nd_label,
+                                 u16 nlabel)
+{
+       nd_label->nlabel = __cpu_to_le16(nlabel);
+}
+
+static inline u64 nsl_get_lbasize(struct nvdimm_drvdata *ndd,
+                                 struct nd_namespace_label *nd_label)
+{
+       return __le64_to_cpu(nd_label->lbasize);
+}
+
+static inline void nsl_set_lbasize(struct nvdimm_drvdata *ndd,
+                                  struct nd_namespace_label *nd_label,
+                                  u64 lbasize)
+{
+       nd_label->lbasize = __cpu_to_le64(lbasize);
+}
+
+bool nsl_validate_blk_isetcookie(struct nvdimm_drvdata *ndd,
+                                struct nd_namespace_label *nd_label,
+                                u64 isetcookie);
+bool nsl_validate_type_guid(struct nvdimm_drvdata *ndd,
+                           struct nd_namespace_label *nd_label, guid_t *guid);
+enum nvdimm_claim_class nsl_get_claim_class(struct nvdimm_drvdata *ndd,
+                                           struct nd_namespace_label *nd_label);
+
 struct nd_region_data {
        int ns_count;
        int ns_active;
index 1e0615b..72de88f 100644 (file)
@@ -450,11 +450,11 @@ static int pmem_attach_disk(struct device *dev,
                pmem->pfn_flags |= PFN_MAP;
                bb_range = pmem->pgmap.range;
        } else {
+               addr = devm_memremap(dev, pmem->phys_addr,
+                               pmem->size, ARCH_MEMREMAP_PMEM);
                if (devm_add_action_or_reset(dev, pmem_release_queue,
                                        &pmem->pgmap))
                        return -ENOMEM;
-               addr = devm_memremap(dev, pmem->phys_addr,
-                               pmem->size, ARCH_MEMREMAP_PMEM);
                bb_range.start =  res->start;
                bb_range.end = res->end;
        }
index 376f1ef..d0416db 100644 (file)
@@ -2,17 +2,6 @@
 comment "S/390 block device drivers"
        depends on S390 && BLOCK
 
-config BLK_DEV_XPRAM
-       def_tristate m
-       prompt "XPRAM disk support"
-       depends on S390 && BLOCK
-       help
-         Select this option if you want to use your expanded storage on S/390
-         or zSeries as a disk.  This is useful as a _fast_ swap device if you
-         want to access more than 2G of memory when running in 31 bit mode.
-         This option is also available as a module which will be called
-         xpram.  If unsure, say "N".
-
 config DCSSBLK
        def_tristate m
        select FS_DAX_LIMITED
index 60c85cf..a0a54d2 100644 (file)
@@ -16,7 +16,6 @@ obj-$(CONFIG_DASD) += dasd_mod.o
 obj-$(CONFIG_DASD_DIAG) += dasd_diag_mod.o
 obj-$(CONFIG_DASD_ECKD) += dasd_eckd_mod.o
 obj-$(CONFIG_DASD_FBA)  += dasd_fba_mod.o
-obj-$(CONFIG_BLK_DEV_XPRAM) += xpram.o
 obj-$(CONFIG_DCSSBLK) += dcssblk.o
 
 scm_block-objs := scm_drv.o scm_blk.o
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
deleted file mode 100644 (file)
index ce98fab..0000000
+++ /dev/null
@@ -1,416 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Xpram.c -- the S/390 expanded memory RAM-disk
- *           
- * significant parts of this code are based on
- * the sbull device driver presented in
- * A. Rubini: Linux Device Drivers
- *
- * Author of XPRAM specific coding: Reinhard Buendgen
- *                                  buendgen@de.ibm.com
- * Rewrite for 2.5: Martin Schwidefsky <schwidefsky@de.ibm.com>
- *
- * External interfaces:
- *   Interfaces to linux kernel
- *        xpram_setup: read kernel parameters
- *   Device specific file operations
- *        xpram_iotcl
- *        xpram_open
- *
- * "ad-hoc" partitioning:
- *    the expanded memory can be partitioned among several devices 
- *    (with different minors). The partitioning set up can be
- *    set by kernel or module parameters (int devs & int sizes[])
- *
- * Potential future improvements:
- *   generic hard disk support to replace ad-hoc partitioning
- */
-
-#define KMSG_COMPONENT "xpram"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/ctype.h>  /* isdigit, isxdigit */
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/blkdev.h>
-#include <linux/blkpg.h>
-#include <linux/hdreg.h>  /* HDIO_GETGEO */
-#include <linux/device.h>
-#include <linux/bio.h>
-#include <linux/gfp.h>
-#include <linux/uaccess.h>
-
-#define XPRAM_NAME     "xpram"
-#define XPRAM_DEVS     1       /* one partition */
-#define XPRAM_MAX_DEVS 32      /* maximal number of devices (partitions) */
-
-typedef struct {
-       unsigned int    size;           /* size of xpram segment in pages */
-       unsigned int    offset;         /* start page of xpram segment */
-} xpram_device_t;
-
-static xpram_device_t xpram_devices[XPRAM_MAX_DEVS];
-static unsigned int xpram_sizes[XPRAM_MAX_DEVS];
-static struct gendisk *xpram_disks[XPRAM_MAX_DEVS];
-static unsigned int xpram_pages;
-static int xpram_devs;
-
-/*
- * Parameter parsing functions.
- */
-static int devs = XPRAM_DEVS;
-static char *sizes[XPRAM_MAX_DEVS];
-
-module_param(devs, int, 0);
-module_param_array(sizes, charp, NULL, 0);
-
-MODULE_PARM_DESC(devs, "number of devices (\"partitions\"), " \
-                "the default is " __MODULE_STRING(XPRAM_DEVS) "\n");
-MODULE_PARM_DESC(sizes, "list of device (partition) sizes " \
-                "the defaults are 0s \n" \
-                "All devices with size 0 equally partition the "
-                "remaining space on the expanded strorage not "
-                "claimed by explicit sizes\n");
-MODULE_LICENSE("GPL");
-
-/*
- * Copy expanded memory page (4kB) into main memory                  
- * Arguments                                                         
- *           page_addr:    address of target page                    
- *           xpage_index:  index of expandeded memory page           
- * Return value                                                      
- *           0:            if operation succeeds
- *           -EIO:         if pgin failed
- *           -ENXIO:       if xpram has vanished
- */
-static int xpram_page_in (unsigned long page_addr, unsigned int xpage_index)
-{
-       int cc = 2;     /* return unused cc 2 if pgin traps */
-
-       asm volatile(
-               "       .insn   rre,0xb22e0000,%1,%2\n"  /* pgin %1,%2 */
-               "0:     ipm     %0\n"
-               "       srl     %0,28\n"
-               "1:\n"
-               EX_TABLE(0b,1b)
-               : "+d" (cc) : "a" (__pa(page_addr)), "d" (xpage_index) : "cc");
-       if (cc == 3)
-               return -ENXIO;
-       if (cc == 2)
-               return -ENXIO;
-       if (cc == 1)
-               return -EIO;
-       return 0;
-}
-
-/*
- * Copy a 4kB page of main memory to an expanded memory page          
- * Arguments                                                          
- *           page_addr:    address of source page                     
- *           xpage_index:  index of expandeded memory page            
- * Return value                                                       
- *           0:            if operation succeeds
- *           -EIO:         if pgout failed
- *           -ENXIO:       if xpram has vanished
- */
-static long xpram_page_out (unsigned long page_addr, unsigned int xpage_index)
-{
-       int cc = 2;     /* return unused cc 2 if pgin traps */
-
-       asm volatile(
-               "       .insn   rre,0xb22f0000,%1,%2\n"  /* pgout %1,%2 */
-               "0:     ipm     %0\n"
-               "       srl     %0,28\n"
-               "1:\n"
-               EX_TABLE(0b,1b)
-               : "+d" (cc) : "a" (__pa(page_addr)), "d" (xpage_index) : "cc");
-       if (cc == 3)
-               return -ENXIO;
-       if (cc == 2)
-               return -ENXIO;
-       if (cc == 1)
-               return -EIO;
-       return 0;
-}
-
-/*
- * Check if xpram is available.
- */
-static int __init xpram_present(void)
-{
-       unsigned long mem_page;
-       int rc;
-
-       mem_page = (unsigned long) __get_free_page(GFP_KERNEL);
-       if (!mem_page)
-               return -ENOMEM;
-       rc = xpram_page_in(mem_page, 0);
-       free_page(mem_page);
-       return rc ? -ENXIO : 0;
-}
-
-/*
- * Return index of the last available xpram page.
- */
-static unsigned long __init xpram_highest_page_index(void)
-{
-       unsigned int page_index, add_bit;
-       unsigned long mem_page;
-
-       mem_page = (unsigned long) __get_free_page(GFP_KERNEL);
-       if (!mem_page)
-               return 0;
-
-       page_index = 0;
-       add_bit = 1ULL << (sizeof(unsigned int)*8 - 1);
-       while (add_bit > 0) {
-               if (xpram_page_in(mem_page, page_index | add_bit) == 0)
-                       page_index |= add_bit;
-               add_bit >>= 1;
-       }
-
-       free_page (mem_page);
-
-       return page_index;
-}
-
-/*
- * Block device make request function.
- */
-static blk_qc_t xpram_submit_bio(struct bio *bio)
-{
-       xpram_device_t *xdev = bio->bi_bdev->bd_disk->private_data;
-       struct bio_vec bvec;
-       struct bvec_iter iter;
-       unsigned int index;
-       unsigned long page_addr;
-       unsigned long bytes;
-
-       blk_queue_split(&bio);
-
-       if ((bio->bi_iter.bi_sector & 7) != 0 ||
-           (bio->bi_iter.bi_size & 4095) != 0)
-               /* Request is not page-aligned. */
-               goto fail;
-       if ((bio->bi_iter.bi_size >> 12) > xdev->size)
-               /* Request size is no page-aligned. */
-               goto fail;
-       if ((bio->bi_iter.bi_sector >> 3) > 0xffffffffU - xdev->offset)
-               goto fail;
-       index = (bio->bi_iter.bi_sector >> 3) + xdev->offset;
-       bio_for_each_segment(bvec, bio, iter) {
-               page_addr = (unsigned long)
-                       kmap(bvec.bv_page) + bvec.bv_offset;
-               bytes = bvec.bv_len;
-               if ((page_addr & 4095) != 0 || (bytes & 4095) != 0)
-                       /* More paranoia. */
-                       goto fail;
-               while (bytes > 0) {
-                       if (bio_data_dir(bio) == READ) {
-                               if (xpram_page_in(page_addr, index) != 0)
-                                       goto fail;
-                       } else {
-                               if (xpram_page_out(page_addr, index) != 0)
-                                       goto fail;
-                       }
-                       page_addr += 4096;
-                       bytes -= 4096;
-                       index++;
-               }
-       }
-       bio_endio(bio);
-       return BLK_QC_T_NONE;
-fail:
-       bio_io_error(bio);
-       return BLK_QC_T_NONE;
-}
-
-static int xpram_getgeo(struct block_device *bdev, struct hd_geometry *geo)
-{
-       unsigned long size;
-
-       /*
-        * get geometry: we have to fake one...  trim the size to a
-        * multiple of 64 (32k): tell we have 16 sectors, 4 heads,
-        * whatever cylinders. Tell also that data starts at sector. 4.
-        */
-       size = (xpram_pages * 8) & ~0x3f;
-       geo->cylinders = size >> 6;
-       geo->heads = 4;
-       geo->sectors = 16;
-       geo->start = 4;
-       return 0;
-}
-
-static const struct block_device_operations xpram_devops =
-{
-       .owner  = THIS_MODULE,
-       .submit_bio = xpram_submit_bio,
-       .getgeo = xpram_getgeo,
-};
-
-/*
- * Setup xpram_sizes array.
- */
-static int __init xpram_setup_sizes(unsigned long pages)
-{
-       unsigned long mem_needed;
-       unsigned long mem_auto;
-       unsigned long long size;
-       char *sizes_end;
-       int mem_auto_no;
-       int i;
-
-       /* Check number of devices. */
-       if (devs <= 0 || devs > XPRAM_MAX_DEVS) {
-               pr_err("%d is not a valid number of XPRAM devices\n",devs);
-               return -EINVAL;
-       }
-       xpram_devs = devs;
-
-       /*
-        * Copy sizes array to xpram_sizes and align partition
-        * sizes to page boundary.
-        */
-       mem_needed = 0;
-       mem_auto_no = 0;
-       for (i = 0; i < xpram_devs; i++) {
-               if (sizes[i]) {
-                       size = simple_strtoull(sizes[i], &sizes_end, 0);
-                       switch (*sizes_end) {
-                       case 'g':
-                       case 'G':
-                               size <<= 20;
-                               break;
-                       case 'm':
-                       case 'M':
-                               size <<= 10;
-                       }
-                       xpram_sizes[i] = (size + 3) & -4UL;
-               }
-               if (xpram_sizes[i])
-                       mem_needed += xpram_sizes[i];
-               else
-                       mem_auto_no++;
-       }
-       
-       pr_info("  number of devices (partitions): %d \n", xpram_devs);
-       for (i = 0; i < xpram_devs; i++) {
-               if (xpram_sizes[i])
-                       pr_info("  size of partition %d: %u kB\n",
-                               i, xpram_sizes[i]);
-               else
-                       pr_info("  size of partition %d to be set "
-                               "automatically\n",i);
-       }
-       pr_info("  memory needed (for sized partitions): %lu kB\n",
-               mem_needed);
-       pr_info("  partitions to be sized automatically: %d\n",
-               mem_auto_no);
-
-       if (mem_needed > pages * 4) {
-               pr_err("Not enough expanded memory available\n");
-               return -EINVAL;
-       }
-
-       /*
-        * partitioning:
-        * xpram_sizes[i] != 0; partition i has size xpram_sizes[i] kB
-        * else:             ; all partitions with zero xpram_sizes[i]
-        *                     partition equally the remaining space
-        */
-       if (mem_auto_no) {
-               mem_auto = ((pages - mem_needed / 4) / mem_auto_no) * 4;
-               pr_info("  automatically determined "
-                       "partition size: %lu kB\n", mem_auto);
-               for (i = 0; i < xpram_devs; i++)
-                       if (xpram_sizes[i] == 0)
-                               xpram_sizes[i] = mem_auto;
-       }
-       return 0;
-}
-
-static int __init xpram_setup_blkdev(void)
-{
-       unsigned long offset;
-       int i, rc = -ENOMEM;
-
-       for (i = 0; i < xpram_devs; i++) {
-               xpram_disks[i] = blk_alloc_disk(NUMA_NO_NODE);
-               if (!xpram_disks[i])
-                       goto out;
-               blk_queue_flag_set(QUEUE_FLAG_NONROT, xpram_disks[i]->queue);
-               blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM,
-                               xpram_disks[i]->queue);
-               blk_queue_logical_block_size(xpram_disks[i]->queue, 4096);
-       }
-
-       /*
-        * Register xpram major.
-        */
-       rc = register_blkdev(XPRAM_MAJOR, XPRAM_NAME);
-       if (rc < 0)
-               goto out;
-
-       /*
-        * Setup device structures.
-        */
-       offset = 0;
-       for (i = 0; i < xpram_devs; i++) {
-               struct gendisk *disk = xpram_disks[i];
-
-               xpram_devices[i].size = xpram_sizes[i] / 4;
-               xpram_devices[i].offset = offset;
-               offset += xpram_devices[i].size;
-               disk->major = XPRAM_MAJOR;
-               disk->first_minor = i;
-               disk->minors = 1;
-               disk->fops = &xpram_devops;
-               disk->private_data = &xpram_devices[i];
-               sprintf(disk->disk_name, "slram%d", i);
-               set_capacity(disk, xpram_sizes[i] << 1);
-               add_disk(disk);
-       }
-
-       return 0;
-out:
-       while (i--)
-               blk_cleanup_disk(xpram_disks[i]);
-       return rc;
-}
-
-/*
- * Finally, the init/exit functions.
- */
-static void __exit xpram_exit(void)
-{
-       int i;
-       for (i = 0; i < xpram_devs; i++) {
-               del_gendisk(xpram_disks[i]);
-               blk_cleanup_disk(xpram_disks[i]);
-       }
-       unregister_blkdev(XPRAM_MAJOR, XPRAM_NAME);
-}
-
-static int __init xpram_init(void)
-{
-       int rc;
-
-       /* Find out size of expanded memory. */
-       if (xpram_present() != 0) {
-               pr_err("No expanded memory available\n");
-               return -ENODEV;
-       }
-       xpram_pages = xpram_highest_page_index() + 1;
-       pr_info("  %u pages expanded memory found (%lu KB).\n",
-               xpram_pages, (unsigned long) xpram_pages*4);
-       rc = xpram_setup_sizes(xpram_pages);
-       if (rc)
-               return rc;
-       return xpram_setup_blkdev();
-}
-
-module_init(xpram_init);
-module_exit(xpram_exit);
index 87cdbac..e459289 100644 (file)
@@ -292,13 +292,15 @@ con3270_update(struct timer_list *t)
  * Read tasklet.
  */
 static void
-con3270_read_tasklet(struct raw3270_request *rrq)
+con3270_read_tasklet(unsigned long data)
 {
        static char kreset_data = TW_KR;
+       struct raw3270_request *rrq;
        struct con3270 *cp;
        unsigned long flags;
        int nr_up, deactivate;
 
+       rrq = (struct raw3270_request *)data;
        cp = (struct con3270 *) rrq->view;
        spin_lock_irqsave(&cp->view.lock, flags);
        nr_up = cp->nr_up;
@@ -625,8 +627,7 @@ con3270_init(void)
        INIT_LIST_HEAD(&condev->lines);
        INIT_LIST_HEAD(&condev->update);
        timer_setup(&condev->timer, con3270_update, 0);
-       tasklet_init(&condev->readlet, 
-                    (void (*)(unsigned long)) con3270_read_tasklet,
+       tasklet_init(&condev->readlet, con3270_read_tasklet,
                     (unsigned long) condev->read);
 
        raw3270_add_view(&condev->view, &con3270_fn, 1, RAW3270_VIEW_LOCK_IRQ);
index e1686a6..6f2b640 100644 (file)
@@ -34,12 +34,13 @@ void schedule_sysrq_work(struct sysrq_work *sw)
 
 
 /**
- * Check for special chars at start of input.
+ * ctrlchar_handle - check for special chars at start of input
  *
- * @param buf Console input buffer.
- * @param len Length of valid data in buffer.
- * @param tty The tty struct for this console.
- * @return CTRLCHAR_NONE, if nothing matched,
+ * @buf: console input buffer
+ * @len: length of valid data in buffer
+ * @tty: the tty struct for this console
+ *
+ * Return: CTRLCHAR_NONE, if nothing matched,
  *         CTRLCHAR_SYSRQ, if sysrq was encountered
  *         otherwise char to be inserted logically or'ed
  *         with CTRLCHAR_CTRL
index 37ee8f6..02b6f39 100644 (file)
@@ -26,7 +26,7 @@
  * struct hmcdrv_ftp_ops - HMC drive FTP operations
  * @startup: startup function
  * @shutdown: shutdown function
- * @cmd: FTP transfer function
+ * @transfer: FTP transfer function
  */
 struct hmcdrv_ftp_ops {
        int (*startup)(void);
index b4b84e3..2cf7fe1 100644 (file)
@@ -28,7 +28,7 @@
 #define SCLP_HEADER            "sclp: "
 
 struct sclp_trace_entry {
-       char id[4];
+       char id[4] __nonstring;
        u32 a;
        u64 b;
 };
index 4dd2eb6..f3c6569 100644 (file)
@@ -262,7 +262,10 @@ static int blacklist_parse_proc_parameters(char *buf)
 
        if (strcmp("free", parm) == 0) {
                rc = blacklist_parse_parameters(buf, free, 0);
-               css_schedule_eval_all_unreg(0);
+               /* There could be subchannels without proper devices connected.
+                * evaluate all the entries
+                */
+               css_schedule_eval_all();
        } else if (strcmp("add", parm) == 0)
                rc = blacklist_parse_parameters(buf, add, 0);
        else if (strcmp("purge", parm) == 0)
index adf33b6..8d14569 100644 (file)
@@ -867,19 +867,6 @@ out_err:
                wake_up(&ccw_device_init_wq);
 }
 
-static void ccw_device_call_sch_unregister(struct ccw_device *cdev)
-{
-       struct subchannel *sch;
-
-       /* Get subchannel reference for local processing. */
-       if (!get_device(cdev->dev.parent))
-               return;
-       sch = to_subchannel(cdev->dev.parent);
-       css_sch_device_unregister(sch);
-       /* Release subchannel reference for local processing. */
-       put_device(&sch->dev);
-}
-
 /*
  * subchannel recognition done. Called from the state machine.
  */
@@ -1857,10 +1844,10 @@ static void ccw_device_todo(struct work_struct *work)
                        css_schedule_eval(sch->schid);
                fallthrough;
        case CDEV_TODO_UNREG:
-               if (sch_is_pseudo_sch(sch))
-                       ccw_device_unregister(cdev);
-               else
-                       ccw_device_call_sch_unregister(cdev);
+               spin_lock_irq(sch->lock);
+               sch_set_cdev(sch, NULL);
+               spin_unlock_irq(sch->lock);
+               ccw_device_unregister(cdev);
                break;
        default:
                break;
index 740996d..7835a87 100644 (file)
@@ -91,7 +91,7 @@ static int diag210_to_senseid(struct senseid *senseid, struct diag210 *diag)
 }
 
 /**
- * diag_get_dev_info - retrieve device information via diag 0x210
+ * diag210_get_dev_info - retrieve device information via diag 0x210
  * @cdev: ccw device
  *
  * Returns zero on success, non-zero otherwise.
index fa0cb86..3563187 100644 (file)
@@ -71,7 +71,7 @@ static LIST_HEAD(zcrypt_ops_list);
 /* Zcrypt related debug feature stuff. */
 debug_info_t *zcrypt_dbf_info;
 
-/**
+/*
  * Process a rescan of the transport layer.
  *
  * Returns 1, if the rescan has been processed, otherwise 0.
@@ -462,7 +462,7 @@ static void zcdn_destroy_all(void)
 
 #endif
 
-/**
+/*
  * zcrypt_read (): Not supported beyond zcrypt 1.3.1.
  *
  * This function is not supported beyond zcrypt 1.3.1.
@@ -473,7 +473,7 @@ static ssize_t zcrypt_read(struct file *filp, char __user *buf,
        return -EPERM;
 }
 
-/**
+/*
  * zcrypt_write(): Not allowed.
  *
  * Write is is not allowed
@@ -484,7 +484,7 @@ static ssize_t zcrypt_write(struct file *filp, const char __user *buf,
        return -EPERM;
 }
 
-/**
+/*
  * zcrypt_open(): Count number of users.
  *
  * Device open function to count number of users.
@@ -512,7 +512,7 @@ static int zcrypt_open(struct inode *inode, struct file *filp)
        return stream_open(inode, filp);
 }
 
-/**
+/*
  * zcrypt_release(): Count number of users.
  *
  * Device close function to count number of users.
@@ -2153,7 +2153,7 @@ static void zcdn_exit(void)
 
 #endif
 
-/**
+/*
  * zcrypt_api_init(): Module initialization.
  *
  * The module initialization code.
@@ -2191,7 +2191,7 @@ out:
        return rc;
 }
 
-/**
+/*
  * zcrypt_api_exit(): Module termination.
  *
  * The module termination code.
index fa8293d..2bd4995 100644 (file)
@@ -65,7 +65,7 @@ static struct ap_device_id zcrypt_cex2a_queue_ids[] = {
 
 MODULE_DEVICE_TABLE(ap, zcrypt_cex2a_queue_ids);
 
-/**
+/*
  * Probe function for CEX2A card devices. It always accepts the AP device
  * since the bus_match already checked the card type.
  * @ap_dev: pointer to the AP device.
@@ -124,7 +124,7 @@ static int zcrypt_cex2a_card_probe(struct ap_device *ap_dev)
        return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX2A card driver information
  * if an AP card device is removed.
  */
@@ -142,7 +142,7 @@ static struct ap_driver zcrypt_cex2a_card_driver = {
        .flags = AP_DRIVER_FLAG_DEFAULT,
 };
 
-/**
+/*
  * Probe function for CEX2A queue devices. It always accepts the AP device
  * since the bus_match already checked the queue type.
  * @ap_dev: pointer to the AP device.
@@ -183,7 +183,7 @@ static int zcrypt_cex2a_queue_probe(struct ap_device *ap_dev)
        return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX2A queue driver information
  * if an AP queue device is removed.
  */
index a0b9f11..6360fdd 100644 (file)
@@ -171,7 +171,7 @@ static const struct attribute_group cca_queue_attr_grp = {
        .attrs = cca_queue_attrs,
 };
 
-/**
+/*
  * Large random number detection function. Its sends a message to a CEX2C/CEX3C
  * card to find out if large random numbers are supported.
  * @ap_dev: pointer to the AP device.
@@ -237,7 +237,7 @@ out_free:
        return rc;
 }
 
-/**
+/*
  * Probe function for CEX2C/CEX3C card devices. It always accepts the
  * AP device since the bus_match already checked the hardware type.
  * @ap_dev: pointer to the AP card device.
@@ -303,7 +303,7 @@ static int zcrypt_cex2c_card_probe(struct ap_device *ap_dev)
        return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX2C/CEX3C card driver information
  * if an AP card device is removed.
  */
@@ -325,7 +325,7 @@ static struct ap_driver zcrypt_cex2c_card_driver = {
        .flags = AP_DRIVER_FLAG_DEFAULT,
 };
 
-/**
+/*
  * Probe function for CEX2C/CEX3C queue devices. It always accepts the
  * AP device since the bus_match already checked the hardware type.
  * @ap_dev: pointer to the AP card device.
@@ -376,7 +376,7 @@ static int zcrypt_cex2c_queue_probe(struct ap_device *ap_dev)
        return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX2C/CEX3C queue driver information
  * if an AP queue device is removed.
  */
index 1f7ec54..06024bb 100644 (file)
@@ -394,7 +394,7 @@ static const struct attribute_group ep11_queue_attr_grp = {
        .attrs = ep11_queue_attrs,
 };
 
-/**
+/*
  * Probe function for CEX4/CEX5/CEX6/CEX7 card device. It always
  * accepts the AP device since the bus_match already checked
  * the hardware type.
@@ -562,7 +562,7 @@ static int zcrypt_cex4_card_probe(struct ap_device *ap_dev)
        return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX4/CEX5/CEX6/CEX7 card driver
  * information if an AP card device is removed.
  */
@@ -586,7 +586,7 @@ static struct ap_driver zcrypt_cex4_card_driver = {
        .flags = AP_DRIVER_FLAG_DEFAULT,
 };
 
-/**
+/*
  * Probe function for CEX4/CEX5/CEX6/CEX7 queue device. It always
  * accepts the AP device since the bus_match already checked
  * the hardware type.
@@ -652,7 +652,7 @@ static int zcrypt_cex4_queue_probe(struct ap_device *ap_dev)
        return rc;
 }
 
-/**
+/*
  * This is called to remove the CEX4/CEX5/CEX6/CEX7 queue driver
  * information if an AP queue device is removed.
  */
index 9940547..99937f3 100644 (file)
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Cryptographic Accelerator (message type 50), " \
                   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-/**
+/*
  * The type 50 message family is associated with a CEXxA cards.
  *
  * The four members of the family are described below.
@@ -136,7 +136,7 @@ struct type50_crb3_msg {
        unsigned char   message[512];
 } __packed;
 
-/**
+/*
  * The type 80 response family is associated with a CEXxA cards.
  *
  * Note that all unsigned char arrays are right-justified and left-padded
@@ -188,7 +188,7 @@ unsigned int get_rsa_crt_fc(struct ica_rsa_modexpo_crt *crt, int *fcode)
        return 0;
 }
 
-/**
+/*
  * Convert a ICAMEX message to a type50 MEX message.
  *
  * @zq: crypto queue pointer
@@ -255,7 +255,7 @@ static int ICAMEX_msg_to_type50MEX_msg(struct zcrypt_queue *zq,
        return 0;
 }
 
-/**
+/*
  * Convert a ICACRT message to a type50 CRT message.
  *
  * @zq: crypto queue pointer
@@ -346,7 +346,7 @@ static int ICACRT_msg_to_type50CRT_msg(struct zcrypt_queue *zq,
        return 0;
 }
 
-/**
+/*
  * Copy results from a type 80 reply message back to user space.
  *
  * @zq: crypto device pointer
@@ -418,7 +418,7 @@ static int convert_response_cex2a(struct zcrypt_queue *zq,
        }
 }
 
-/**
+/*
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
@@ -457,7 +457,7 @@ out:
 
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxA
  * device to handle a modexpo request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -502,7 +502,7 @@ out:
        return rc;
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxA
  * device to handle a modexpo_crt request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -547,7 +547,7 @@ out:
        return rc;
 }
 
-/**
+/*
  * The crypto operations for message type 50.
  */
 static struct zcrypt_ops zcrypt_msgtype50_ops = {
index 752c639..bc5a8c3 100644 (file)
@@ -44,7 +44,7 @@ MODULE_DESCRIPTION("Cryptographic Coprocessor (message type 6), " \
                   "Copyright IBM Corp. 2001, 2012");
 MODULE_LICENSE("GPL");
 
-/**
+/*
  * CPRB
  *       Note that all shorts, ints and longs are little-endian.
  *       All pointer fields are 32-bits long, and mean nothing
@@ -107,7 +107,7 @@ struct function_and_rules_block {
        unsigned char only_rule[8];
 } __packed;
 
-/**
+/*
  * The following is used to initialize the CPRBX passed to the CEXxC/CEXxP
  * card in a type6 message. The 3 fields that must be filled in at execution
  * time are  req_parml, rpl_parml and usage_domain.
@@ -236,7 +236,7 @@ int speed_idx_ep11(int req_type)
 }
 
 
-/**
+/*
  * Convert a ICAMEX message to a type6 MEX message.
  *
  * @zq: crypto device pointer
@@ -305,7 +305,7 @@ static int ICAMEX_msg_to_type6MEX_msgX(struct zcrypt_queue *zq,
        return 0;
 }
 
-/**
+/*
  * Convert a ICACRT message to a type6 CRT message.
  *
  * @zq: crypto device pointer
@@ -374,7 +374,7 @@ static int ICACRT_msg_to_type6CRT_msgX(struct zcrypt_queue *zq,
        return 0;
 }
 
-/**
+/*
  * Convert a XCRB message to a type6 CPRB message.
  *
  * @zq: crypto device pointer
@@ -571,7 +571,7 @@ static int xcrb_msg_to_type6_ep11cprb_msgx(bool userspace, struct ap_message *ap
        return 0;
 }
 
-/**
+/*
  * Copy results from a type 86 ICA reply message back to user space.
  *
  * @zq: crypto device pointer
@@ -697,7 +697,7 @@ static int convert_type86_ica(struct zcrypt_queue *zq,
        return 0;
 }
 
-/**
+/*
  * Copy results from a type 86 XCRB reply message back to user space.
  *
  * @zq: crypto device pointer
@@ -728,7 +728,7 @@ static int convert_type86_xcrb(bool userspace, struct zcrypt_queue *zq,
        return 0;
 }
 
-/**
+/*
  * Copy results from a type 86 EP11 XCRB reply message back to user space.
  *
  * @zq: crypto device pointer
@@ -911,7 +911,7 @@ static int convert_response_rng(struct zcrypt_queue *zq,
        }
 }
 
-/**
+/*
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
@@ -966,7 +966,7 @@ out:
        complete(&(resp_type->work));
 }
 
-/**
+/*
  * This function is called from the AP bus code after a crypto request
  * "msg" has finished with the reply message "reply".
  * It is called from tasklet context.
@@ -1015,7 +1015,7 @@ out:
 
 static atomic_t zcrypt_step = ATOMIC_INIT(0);
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxC
  * device to handle a modexpo request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1063,7 +1063,7 @@ out_free:
        return rc;
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxC
  * device to handle a modexpo_crt request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1112,7 +1112,7 @@ out_free:
        return rc;
 }
 
-/**
+/*
  * Fetch function code from cprb.
  * Extracting the fc requires to copy the cprb from userspace.
  * So this function allocates memory and needs an ap_msg prepared
@@ -1140,7 +1140,7 @@ unsigned int get_cprb_fc(bool userspace, struct ica_xcRB *xcRB,
        return XCRB_msg_to_type6CPRB_msgX(userspace, ap_msg, xcRB, func_code, dom);
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxC
  * device to handle a send_cprb request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1170,7 +1170,7 @@ out:
        return rc;
 }
 
-/**
+/*
  * Fetch function code from ep11 cprb.
  * Extracting the fc requires to copy the ep11 cprb from userspace.
  * So this function allocates memory and needs an ap_msg prepared
@@ -1198,7 +1198,7 @@ unsigned int get_ep11cprb_fc(bool userspace, struct ep11_urb *xcrb,
        return xcrb_msg_to_type6_ep11cprb_msgx(userspace, ap_msg, xcrb, func_code);
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEX4P
  * device to handle a send_ep11_cprb request.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1228,7 +1228,7 @@ static long zcrypt_msgtype6_send_ep11_cprb(bool userspace, struct zcrypt_queue *
        } __packed * payload_hdr = NULL;
 
 
-       /**
+       /*
         * The target domain field within the cprb body/payload block will be
         * replaced by the usage domain for non-management commands only.
         * Therefore we check the first bit of the 'flags' parameter for
@@ -1299,7 +1299,7 @@ unsigned int get_rng_fc(struct ap_message *ap_msg, int *func_code,
        return 0;
 }
 
-/**
+/*
  * The request distributor calls this function if it picked the CEXxC
  * device to generate random data.
  * @zq: pointer to zcrypt_queue structure that identifies the
@@ -1339,7 +1339,7 @@ out:
        return rc;
 }
 
-/**
+/*
  * The crypto operations for a CEXxC card.
  */
 static struct zcrypt_ops zcrypt_msgtype6_norng_ops = {
index ca473b3..cbc3b62 100644 (file)
@@ -766,7 +766,7 @@ static void zfcp_dbf_unregister(struct zfcp_dbf *dbf)
 }
 
 /**
- * zfcp_adapter_debug_register - registers debug feature for an adapter
+ * zfcp_dbf_adapter_register - registers debug feature for an adapter
  * @adapter: pointer to adapter for which debug features should be registered
  * return: -ENOMEM on error, 0 otherwise
  */
@@ -824,7 +824,7 @@ err_out:
 }
 
 /**
- * zfcp_adapter_debug_unregister - unregisters debug feature for an adapter
+ * zfcp_dbf_adapter_unregister - unregisters debug feature for an adapter
  * @adapter: pointer to adapter for which debug features should be unregistered
  */
 void zfcp_dbf_adapter_unregister(struct zfcp_adapter *adapter)
index 6da8f6d..c1f9792 100644 (file)
@@ -2275,7 +2275,7 @@ static void zfcp_fsf_close_lun_handler(struct zfcp_fsf_req *req)
 }
 
 /**
- * zfcp_fsf_close_LUN - close LUN
+ * zfcp_fsf_close_lun - close LUN
  * @erp_action: pointer to erp_action triggering the "close LUN"
  * Returns: 0 on success, error otherwise
  */
index 8f19bed..6a27201 100644 (file)
@@ -384,7 +384,7 @@ free_req_q:
 }
 
 /**
- * zfcp_close_qdio - close qdio queues for an adapter
+ * zfcp_qdio_close - close qdio queues for an adapter
  * @qdio: pointer to structure zfcp_qdio
  */
 void zfcp_qdio_close(struct zfcp_qdio *qdio)
index 59333f0..60f2a04 100644 (file)
@@ -111,9 +111,9 @@ static void zfcp_unit_release(struct device *dev)
 }
 
 /**
- * zfcp_unit_enqueue - enqueue unit to unit list of a port.
+ * zfcp_unit_add - add unit to unit list of a port.
  * @port: pointer to port where unit is added
- * @fcp_lun: FCP LUN of unit to be enqueued
+ * @fcp_lun: FCP LUN of unit to be added
  * Returns: 0 success
  *
  * Sets up some unit internal structures and creates sysfs entry.
index 71fb710..7420d2c 100644 (file)
@@ -962,6 +962,7 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
        struct fb_var_screeninfo old_var;
        struct fb_videomode mode;
        struct fb_event event;
+       u32 unused;
 
        if (var->activate & FB_ACTIVATE_INV_MODE) {
                struct fb_videomode mode1, mode2;
@@ -1008,6 +1009,11 @@ fb_set_var(struct fb_info *info, struct fb_var_screeninfo *var)
        if (var->xres < 8 || var->yres < 8)
                return -EINVAL;
 
+       /* Too huge resolution causes multiplication overflow. */
+       if (check_mul_overflow(var->xres, var->yres, &unused) ||
+           check_mul_overflow(var->xres_virtual, var->yres_virtual, &unused))
+               return -EINVAL;
+
        ret = info->fbops->fb_check_var(var, info);
 
        if (ret)
index d8207a1..47af46a 100644 (file)
@@ -43,7 +43,7 @@ source "fs/f2fs/Kconfig"
 source "fs/zonefs/Kconfig"
 
 config FS_DAX
-       bool "Direct Access (DAX) support"
+       bool "File system based Direct Access (DAX) support"
        depends on MMU
        depends on !(ARM || MIPS || SPARC)
        select DEV_PAGEMAP_OPS if (ZONE_DEVICE && !FS_DAX_LIMITED)
@@ -53,8 +53,23 @@ config FS_DAX
          Direct Access (DAX) can be used on memory-backed block devices.
          If the block device supports DAX and the filesystem supports DAX,
          then you can avoid using the pagecache to buffer I/Os.  Turning
-         on this option will compile in support for DAX; you will need to
-         mount the filesystem using the -o dax option.
+         on this option will compile in support for DAX.
+
+         For a DAX device to support file system access it needs to have
+         struct pages.  For the nfit based NVDIMMs this can be enabled
+         using the ndctl utility:
+
+               # ndctl create-namespace --force --reconfig=namespace0.0 \
+                       --mode=fsdax --map=mem
+
+         See the 'create-namespace' man page for details on the overhead of
+         --map=mem:
+         https://docs.pmem.io/ndctl-user-guide/ndctl-man-pages/ndctl-create-namespace
+
+          For ndctl to work CONFIG_DEV_DAX needs to be enabled as well. For most
+         file systems DAX support needs to be manually enabled globally or
+         per-inode using a mount option as well.  See the file documentation in
+         Documentation/filesystems/dax.rst for details.
 
          If you do not have a block device that is capable of using this,
          or if unsure, say N.  Saying Y will increase the size of the kernel
@@ -219,8 +234,7 @@ config ARCH_SUPPORTS_HUGETLBFS
 
 config HUGETLBFS
        bool "HugeTLB file system support"
-       depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
-                  ARCH_SUPPORTS_HUGETLBFS || BROKEN
+       depends on X86 || IA64 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN
        help
          hugetlbfs is a filesystem backing for HugeTLB pages, based on
          ramfs. For architectures that support it, say Y here and read
index 87ef39d..473d21b 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -249,6 +249,34 @@ void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
 }
 EXPORT_SYMBOL(setattr_copy);
 
+int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
+               unsigned int ia_valid)
+{
+       int error;
+
+       if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
+               if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+                       return -EPERM;
+       }
+
+       /*
+        * If utimes(2) and friends are called with times == NULL (or both
+        * times are UTIME_NOW), then we need to check for write permission
+        */
+       if (ia_valid & ATTR_TOUCH) {
+               if (IS_IMMUTABLE(inode))
+                       return -EPERM;
+
+               if (!inode_owner_or_capable(mnt_userns, inode)) {
+                       error = inode_permission(mnt_userns, inode, MAY_WRITE);
+                       if (error)
+                               return error;
+               }
+       }
+       return 0;
+}
+EXPORT_SYMBOL(may_setattr);
+
 /**
  * notify_change - modify attributes of a filesytem object
  * @mnt_userns:        user namespace of the mount the inode was found from
@@ -290,25 +318,9 @@ int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
 
        WARN_ON_ONCE(!inode_is_locked(inode));
 
-       if (ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID | ATTR_TIMES_SET)) {
-               if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
-                       return -EPERM;
-       }
-
-       /*
-        * If utimes(2) and friends are called with times == NULL (or both
-        * times are UTIME_NOW), then we need to check for write permission
-        */
-       if (ia_valid & ATTR_TOUCH) {
-               if (IS_IMMUTABLE(inode))
-                       return -EPERM;
-
-               if (!inode_owner_or_capable(mnt_userns, inode)) {
-                       error = inode_permission(mnt_userns, inode, MAY_WRITE);
-                       if (error)
-                               return error;
-               }
-       }
+       error = may_setattr(mnt_userns, inode, ia_valid);
+       if (error)
+               return error;
 
        if ((ia_valid & ATTR_MODE)) {
                umode_t amode = attr->ia_mode;
index 2f9515d..355ea88 100644 (file)
@@ -3314,6 +3314,30 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
         */
        fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
 
+       /*
+        * Flag our filesystem as having big metadata blocks if they are bigger
+        * than the page size.
+        */
+       if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
+               if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
+                       btrfs_info(fs_info,
+                               "flagging fs with big metadata feature");
+               features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+       }
+
+       /* Set up fs_info before parsing mount options */
+       nodesize = btrfs_super_nodesize(disk_super);
+       sectorsize = btrfs_super_sectorsize(disk_super);
+       stripesize = sectorsize;
+       fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
+       fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
+
+       fs_info->nodesize = nodesize;
+       fs_info->sectorsize = sectorsize;
+       fs_info->sectorsize_bits = ilog2(sectorsize);
+       fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
+       fs_info->stripesize = stripesize;
+
        ret = btrfs_parse_options(fs_info, options, sb->s_flags);
        if (ret) {
                err = ret;
@@ -3340,30 +3364,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
        if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
                btrfs_info(fs_info, "has skinny extents");
 
-       /*
-        * flag our filesystem as having big metadata blocks if
-        * they are bigger than the page size
-        */
-       if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
-               if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
-                       btrfs_info(fs_info,
-                               "flagging fs with big metadata feature");
-               features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
-       }
-
-       nodesize = btrfs_super_nodesize(disk_super);
-       sectorsize = btrfs_super_sectorsize(disk_super);
-       stripesize = sectorsize;
-       fs_info->dirty_metadata_batch = nodesize * (1 + ilog2(nr_cpu_ids));
-       fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
-
-       /* Cache block sizes */
-       fs_info->nodesize = nodesize;
-       fs_info->sectorsize = sectorsize;
-       fs_info->sectorsize_bits = ilog2(sectorsize);
-       fs_info->csums_per_leaf = BTRFS_MAX_ITEM_SIZE(fs_info) / fs_info->csum_size;
-       fs_info->stripesize = stripesize;
-
        /*
         * mixed block groups end up with duplicate but slightly offset
         * extent buffers for the same range.  It leads to corruptions
index 41524f9..cc61813 100644 (file)
@@ -3223,6 +3223,8 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
        struct inode *inode = file_inode(file);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_ioctl_vol_args_v2 *vol_args;
+       struct block_device *bdev = NULL;
+       fmode_t mode;
        int ret;
        bool cancel = false;
 
@@ -3255,9 +3257,9 @@ static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg)
        /* Exclusive operation is now claimed */
 
        if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID)
-               ret = btrfs_rm_device(fs_info, NULL, vol_args->devid);
+               ret = btrfs_rm_device(fs_info, NULL, vol_args->devid, &bdev, &mode);
        else
-               ret = btrfs_rm_device(fs_info, vol_args->name, 0);
+               ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode);
 
        btrfs_exclop_finish(fs_info);
 
@@ -3273,6 +3275,8 @@ out:
        kfree(vol_args);
 err_drop:
        mnt_drop_write_file(file);
+       if (bdev)
+               blkdev_put(bdev, mode);
        return ret;
 }
 
@@ -3281,6 +3285,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
        struct inode *inode = file_inode(file);
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        struct btrfs_ioctl_vol_args *vol_args;
+       struct block_device *bdev = NULL;
+       fmode_t mode;
        int ret;
        bool cancel;
 
@@ -3302,7 +3308,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
        ret = exclop_start_or_cancel_reloc(fs_info, BTRFS_EXCLOP_DEV_REMOVE,
                                           cancel);
        if (ret == 0) {
-               ret = btrfs_rm_device(fs_info, vol_args->name, 0);
+               ret = btrfs_rm_device(fs_info, vol_args->name, 0, &bdev, &mode);
                if (!ret)
                        btrfs_info(fs_info, "disk deleted %s", vol_args->name);
                btrfs_exclop_finish(fs_info);
@@ -3311,7 +3317,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
        kfree(vol_args);
 out_drop_write:
        mnt_drop_write_file(file);
-
+       if (bdev)
+               blkdev_put(bdev, mode);
        return ret;
 }
 
index 6461ebc..340f995 100644 (file)
@@ -5,7 +5,7 @@
 
 #include <linux/sched.h>
 #include <linux/wait.h>
-#include <asm/div64.h>
+#include <linux/math64.h>
 #include <linux/rbtree.h>
 
 #define in_range(b, first, len) ((b) >= (first) && (b) < (first) + (len))
index edb65ab..6b51fd2 100644 (file)
@@ -1049,6 +1049,7 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
                                u64 len)
 {
        struct inode *inode = ordered->inode;
+       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
        u64 file_offset = ordered->file_offset + pos;
        u64 disk_bytenr = ordered->disk_bytenr + pos;
        u64 num_bytes = len;
@@ -1066,6 +1067,13 @@ static int clone_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pos,
        else
                type = __ffs(flags_masked);
 
+       /*
+        * The splitting extent is already counted and will be added again
+        * in btrfs_add_ordered_extent_*(). Subtract num_bytes to avoid
+        * double counting.
+        */
+       percpu_counter_add_batch(&fs_info->ordered_bytes, -num_bytes,
+                                fs_info->delalloc_batch);
        if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered->flags)) {
                WARN_ON_ONCE(1);
                ret = btrfs_add_ordered_extent_compress(BTRFS_I(inode),
index ec3a874..464485a 100644 (file)
@@ -558,6 +558,8 @@ static int btrfs_free_stale_devices(const char *path,
        struct btrfs_device *device, *tmp_device;
        int ret = 0;
 
+       lockdep_assert_held(&uuid_mutex);
+
        if (path)
                ret = -ENOENT;
 
@@ -988,11 +990,12 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
        struct btrfs_device *orig_dev;
        int ret = 0;
 
+       lockdep_assert_held(&uuid_mutex);
+
        fs_devices = alloc_fs_devices(orig->fsid, NULL);
        if (IS_ERR(fs_devices))
                return fs_devices;
 
-       mutex_lock(&orig->device_list_mutex);
        fs_devices->total_devices = orig->total_devices;
 
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
@@ -1024,10 +1027,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
        }
-       mutex_unlock(&orig->device_list_mutex);
        return fs_devices;
 error:
-       mutex_unlock(&orig->device_list_mutex);
        free_fs_devices(fs_devices);
        return ERR_PTR(ret);
 }
@@ -1869,15 +1870,17 @@ out:
  * Function to update ctime/mtime for a given device path.
  * Mainly used for ctime/mtime based probe like libblkid.
  */
-static void update_dev_time(const char *path_name)
+static void update_dev_time(struct block_device *bdev)
 {
-       struct file *filp;
+       struct inode *inode = bdev->bd_inode;
+       struct timespec64 now;
 
-       filp = filp_open(path_name, O_RDWR, 0);
-       if (IS_ERR(filp))
+       /* Shouldn't happen but just in case. */
+       if (!inode)
                return;
-       file_update_time(filp);
-       filp_close(filp, NULL);
+
+       now = current_time(inode);
+       generic_update_time(inode, &now, S_MTIME | S_CTIME);
 }
 
 static int btrfs_rm_dev_item(struct btrfs_device *device)
@@ -2053,11 +2056,11 @@ void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info,
        btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
 
        /* Update ctime/mtime for device path for libblkid */
-       update_dev_time(device_path);
+       update_dev_time(bdev);
 }
 
 int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
-                   u64 devid)
+                   u64 devid, struct block_device **bdev, fmode_t *mode)
 {
        struct btrfs_device *device;
        struct btrfs_fs_devices *cur_devices;
@@ -2171,15 +2174,26 @@ int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
        mutex_unlock(&fs_devices->device_list_mutex);
 
        /*
-        * at this point, the device is zero sized and detached from
-        * the devices list.  All that's left is to zero out the old
-        * supers and free the device.
+        * At this point, the device is zero sized and detached from the
+        * devices list.  All that's left is to zero out the old supers and
+        * free the device.
+        *
+        * We cannot call btrfs_close_bdev() here because we're holding the sb
+        * write lock, and blkdev_put() will pull in the ->open_mutex on the
+        * block device and it's dependencies.  Instead just flush the device
+        * and let the caller do the final blkdev_put.
         */
-       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+       if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
                btrfs_scratch_superblocks(fs_info, device->bdev,
                                          device->name->str);
+               if (device->bdev) {
+                       sync_blockdev(device->bdev);
+                       invalidate_bdev(device->bdev);
+               }
+       }
 
-       btrfs_close_bdev(device);
+       *bdev = device->bdev;
+       *mode = device->mode;
        synchronize_rcu();
        btrfs_free_device(device);
 
@@ -2706,7 +2720,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
        btrfs_forget_devices(device_path);
 
        /* Update ctime/mtime for blkid or udev */
-       update_dev_time(device_path);
+       update_dev_time(bdev);
 
        return ret;
 
index b082250..2183361 100644 (file)
@@ -472,7 +472,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
                                        const u8 *uuid);
 void btrfs_free_device(struct btrfs_device *device);
 int btrfs_rm_device(struct btrfs_fs_info *fs_info,
-                   const char *device_path, u64 devid);
+                   const char *device_path, u64 devid,
+                   struct block_device **bdev, fmode_t *mode);
 void __exit btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
 int btrfs_grow_device(struct btrfs_trans_handle *trans,
index a8d49e8..11b8855 100644 (file)
@@ -546,7 +546,7 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc)
                return err;
 
        if (test_opt(ctx, DAX_ALWAYS) &&
-           !bdev_dax_supported(sb->s_bdev, EROFS_BLKSIZ)) {
+           !dax_supported(sbi->dax_dev, sb->s_bdev, EROFS_BLKSIZ, 0, bdev_nr_sectors(sb->s_bdev))) {
                errorfc(fc, "DAX unsupported by block device. Turning off DAX.");
                clear_opt(ctx, DAX_ALWAYS);
        }
index 648ed77..06f4c5a 100644 (file)
@@ -1686,8 +1686,8 @@ static int ep_send_events(struct eventpoll *ep,
                if (!revents)
                        continue;
 
-               if (__put_user(revents, &events->events) ||
-                   __put_user(epi->event.data, &events->data)) {
+               events = epoll_put_uevent(revents, epi->event.data, events);
+               if (!events) {
                        list_add(&epi->rdllink, &txlist);
                        ep_pm_stay_awake(epi);
                        if (!res)
@@ -1695,7 +1695,6 @@ static int ep_send_events(struct eventpoll *ep,
                        break;
                }
                res++;
-               events++;
                if (epi->event.events & EPOLLONESHOT)
                        epi->event.events &= EP_PRIVATE_BITS;
                else if (!(epi->event.events & EPOLLET)) {
index 987bcf3..d8d580b 100644 (file)
@@ -946,7 +946,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
        blocksize = BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
 
        if (test_opt(sb, DAX)) {
-               if (!bdev_dax_supported(sb->s_bdev, blocksize)) {
+               if (!dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
+                               bdev_nr_sectors(sb->s_bdev))) {
                        ext2_msg(sb, KERN_ERR,
                                "DAX unsupported by block device. Turning off DAX.");
                        clear_opt(sbi->s_mount_opt, DAX);
index 136940a..0775950 100644 (file)
@@ -4287,7 +4287,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                goto failed_mount;
        }
 
-       if (bdev_dax_supported(sb->s_bdev, blocksize))
+       if (dax_supported(dax_dev, sb->s_bdev, blocksize, 0,
+                       bdev_nr_sectors(sb->s_bdev)))
                set_bit(EXT4_FLAGS_BDEV_IS_DAX, &sbi->s_ext4_flags);
 
        if (sbi->s_mount_opt & EXT4_MOUNT_DAX_ALWAYS) {
index 90b8d87..58b9067 100644 (file)
@@ -209,21 +209,28 @@ SYSCALL_DEFINE3(sysfs, int, option, unsigned long, arg1, unsigned long, arg2)
 }
 #endif
 
-int __init get_filesystem_list(char *buf)
+int __init list_bdev_fs_names(char *buf, size_t size)
 {
-       int len = 0;
-       struct file_system_type * tmp;
+       struct file_system_type *p;
+       size_t len;
+       int count = 0;
 
        read_lock(&file_systems_lock);
-       tmp = file_systems;
-       while (tmp && len < PAGE_SIZE - 80) {
-               len += sprintf(buf+len, "%s\t%s\n",
-                       (tmp->fs_flags & FS_REQUIRES_DEV) ? "" : "nodev",
-                       tmp->name);
-               tmp = tmp->next;
+       for (p = file_systems; p; p = p->next) {
+               if (!(p->fs_flags & FS_REQUIRES_DEV))
+                       continue;
+               len = strlen(p->name) + 1;
+               if (len > size) {
+                       pr_warn("%s: truncating file system list\n", __func__);
+                       break;
+               }
+               memcpy(buf, p->name, len);
+               buf += len;
+               size -= len;
+               count++;
        }
        read_unlock(&file_systems_lock);
-       return len;
+       return count;
 }
 
 #ifdef CONFIG_PROC_FS
index 6e15434..3130f85 100644 (file)
@@ -1985,8 +1985,8 @@ static int gfs2_setattr(struct user_namespace *mnt_userns,
        if (error)
                goto out;
 
-       error = -EPERM;
-       if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+       error = may_setattr(&init_user_ns, inode, attr->ia_valid);
+       if (error)
                goto error;
 
        error = setattr_prepare(&init_user_ns, dentry, attr);
index 7d0c3db..d5c9d88 100644 (file)
@@ -381,6 +381,7 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end,
 static const struct file_operations hostfs_file_fops = {
        .llseek         = generic_file_llseek,
        .splice_read    = generic_file_splice_read,
+       .splice_write   = iter_file_splice_write,
        .read_iter      = generic_file_read_iter,
        .write_iter     = generic_file_write_iter,
        .mmap           = generic_file_mmap,
index d816c09..855ea54 100644 (file)
@@ -3480,6 +3480,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                if (req->flags & REQ_F_NOWAIT)
                        goto done;
                /* some cases will consume bytes even on error returns */
+               iov_iter_reexpand(iter, iter->count + iter->truncated);
                iov_iter_revert(iter, io_size - iov_iter_count(iter));
                ret = 0;
        } else if (ret == -EIOCBQUEUED) {
@@ -3619,6 +3620,7 @@ done:
        } else {
 copy_iov:
                /* some cases will consume bytes even on error returns */
+               iov_iter_reexpand(iter, iter->count + iter->truncated);
                iov_iter_revert(iter, io_size - iov_iter_count(iter));
                ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
                return ret ?: -EAGAIN;
index 2243a2c..8317f7c 100644 (file)
@@ -28,37 +28,60 @@ static int try_to_realloc_ndr_blob(struct ndr *n, size_t sz)
        return 0;
 }
 
-static void ndr_write_int16(struct ndr *n, __u16 value)
+static int ndr_write_int16(struct ndr *n, __u16 value)
 {
-       if (n->length <= n->offset + sizeof(value))
-               try_to_realloc_ndr_blob(n, sizeof(value));
+       if (n->length <= n->offset + sizeof(value)) {
+               int ret;
+
+               ret = try_to_realloc_ndr_blob(n, sizeof(value));
+               if (ret)
+                       return ret;
+       }
 
        *(__le16 *)ndr_get_field(n) = cpu_to_le16(value);
        n->offset += sizeof(value);
+       return 0;
 }
 
-static void ndr_write_int32(struct ndr *n, __u32 value)
+static int ndr_write_int32(struct ndr *n, __u32 value)
 {
-       if (n->length <= n->offset + sizeof(value))
-               try_to_realloc_ndr_blob(n, sizeof(value));
+       if (n->length <= n->offset + sizeof(value)) {
+               int ret;
+
+               ret = try_to_realloc_ndr_blob(n, sizeof(value));
+               if (ret)
+                       return ret;
+       }
 
        *(__le32 *)ndr_get_field(n) = cpu_to_le32(value);
        n->offset += sizeof(value);
+       return 0;
 }
 
-static void ndr_write_int64(struct ndr *n, __u64 value)
+static int ndr_write_int64(struct ndr *n, __u64 value)
 {
-       if (n->length <= n->offset + sizeof(value))
-               try_to_realloc_ndr_blob(n, sizeof(value));
+       if (n->length <= n->offset + sizeof(value)) {
+               int ret;
+
+               ret = try_to_realloc_ndr_blob(n, sizeof(value));
+               if (ret)
+                       return ret;
+       }
 
        *(__le64 *)ndr_get_field(n) = cpu_to_le64(value);
        n->offset += sizeof(value);
+       return 0;
 }
 
 static int ndr_write_bytes(struct ndr *n, void *value, size_t sz)
 {
-       if (n->length <= n->offset + sz)
-               try_to_realloc_ndr_blob(n, sz);
+       if (n->length <= n->offset + sz) {
+               int ret;
+
+               ret = try_to_realloc_ndr_blob(n, sz);
+               if (ret)
+                       return ret;
+       }
 
        memcpy(ndr_get_field(n), value, sz);
        n->offset += sz;
@@ -70,8 +93,13 @@ static int ndr_write_string(struct ndr *n, char *value)
        size_t sz;
 
        sz = strlen(value) + 1;
-       if (n->length <= n->offset + sz)
-               try_to_realloc_ndr_blob(n, sz);
+       if (n->length <= n->offset + sz) {
+               int ret;
+
+               ret = try_to_realloc_ndr_blob(n, sz);
+               if (ret)
+                       return ret;
+       }
 
        memcpy(ndr_get_field(n), value, sz);
        n->offset += sz;
@@ -81,9 +109,14 @@ static int ndr_write_string(struct ndr *n, char *value)
 
 static int ndr_read_string(struct ndr *n, void *value, size_t sz)
 {
-       int len = strnlen(ndr_get_field(n), sz);
+       int len;
 
-       memcpy(value, ndr_get_field(n), len);
+       if (n->offset + sz > n->length)
+               return -EINVAL;
+
+       len = strnlen(ndr_get_field(n), sz);
+       if (value)
+               memcpy(value, ndr_get_field(n), len);
        len++;
        n->offset += len;
        n->offset = ALIGN(n->offset, 2);
@@ -92,41 +125,52 @@ static int ndr_read_string(struct ndr *n, void *value, size_t sz)
 
 static int ndr_read_bytes(struct ndr *n, void *value, size_t sz)
 {
-       memcpy(value, ndr_get_field(n), sz);
+       if (n->offset + sz > n->length)
+               return -EINVAL;
+
+       if (value)
+               memcpy(value, ndr_get_field(n), sz);
        n->offset += sz;
        return 0;
 }
 
-static __u16 ndr_read_int16(struct ndr *n)
+static int ndr_read_int16(struct ndr *n, __u16 *value)
 {
-       __u16 ret;
+       if (n->offset + sizeof(__u16) > n->length)
+               return -EINVAL;
 
-       ret = le16_to_cpu(*(__le16 *)ndr_get_field(n));
+       if (value)
+               *value = le16_to_cpu(*(__le16 *)ndr_get_field(n));
        n->offset += sizeof(__u16);
-       return ret;
+       return 0;
 }
 
-static __u32 ndr_read_int32(struct ndr *n)
+static int ndr_read_int32(struct ndr *n, __u32 *value)
 {
-       __u32 ret;
+       if (n->offset + sizeof(__u32) > n->length)
+               return 0;
 
-       ret = le32_to_cpu(*(__le32 *)ndr_get_field(n));
+       if (value)
+               *value = le32_to_cpu(*(__le32 *)ndr_get_field(n));
        n->offset += sizeof(__u32);
-       return ret;
+       return 0;
 }
 
-static __u64 ndr_read_int64(struct ndr *n)
+static int ndr_read_int64(struct ndr *n, __u64 *value)
 {
-       __u64 ret;
+       if (n->offset + sizeof(__u64) > n->length)
+               return -EINVAL;
 
-       ret = le64_to_cpu(*(__le64 *)ndr_get_field(n));
+       if (value)
+               *value = le64_to_cpu(*(__le64 *)ndr_get_field(n));
        n->offset += sizeof(__u64);
-       return ret;
+       return 0;
 }
 
 int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
 {
        char hex_attr[12] = {0};
+       int ret;
 
        n->offset = 0;
        n->length = 1024;
@@ -136,97 +180,161 @@ int ndr_encode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
 
        if (da->version == 3) {
                snprintf(hex_attr, 10, "0x%x", da->attr);
-               ndr_write_string(n, hex_attr);
+               ret = ndr_write_string(n, hex_attr);
        } else {
-               ndr_write_string(n, "");
+               ret = ndr_write_string(n, "");
        }
-       ndr_write_int16(n, da->version);
-       ndr_write_int32(n, da->version);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int16(n, da->version);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int32(n, da->version);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int32(n, da->flags);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int32(n, da->attr);
+       if (ret)
+               return ret;
 
-       ndr_write_int32(n, da->flags);
-       ndr_write_int32(n, da->attr);
        if (da->version == 3) {
-               ndr_write_int32(n, da->ea_size);
-               ndr_write_int64(n, da->size);
-               ndr_write_int64(n, da->alloc_size);
+               ret = ndr_write_int32(n, da->ea_size);
+               if (ret)
+                       return ret;
+               ret = ndr_write_int64(n, da->size);
+               if (ret)
+                       return ret;
+               ret = ndr_write_int64(n, da->alloc_size);
        } else {
-               ndr_write_int64(n, da->itime);
+               ret = ndr_write_int64(n, da->itime);
        }
-       ndr_write_int64(n, da->create_time);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int64(n, da->create_time);
+       if (ret)
+               return ret;
+
        if (da->version == 3)
-               ndr_write_int64(n, da->change_time);
-       return 0;
+               ret = ndr_write_int64(n, da->change_time);
+       return ret;
 }
 
 int ndr_decode_dos_attr(struct ndr *n, struct xattr_dos_attrib *da)
 {
-       char *hex_attr;
-       int version2;
-
-       hex_attr = kzalloc(n->length, GFP_KERNEL);
-       if (!hex_attr)
-               return -ENOMEM;
+       char hex_attr[12];
+       unsigned int version2;
+       int ret;
 
        n->offset = 0;
-       ndr_read_string(n, hex_attr, n->length);
-       kfree(hex_attr);
-       da->version = ndr_read_int16(n);
+       ret = ndr_read_string(n, hex_attr, sizeof(hex_attr));
+       if (ret)
+               return ret;
+
+       ret = ndr_read_int16(n, &da->version);
+       if (ret)
+               return ret;
 
        if (da->version != 3 && da->version != 4) {
                pr_err("v%d version is not supported\n", da->version);
                return -EINVAL;
        }
 
-       version2 = ndr_read_int32(n);
+       ret = ndr_read_int32(n, &version2);
+       if (ret)
+               return ret;
+
        if (da->version != version2) {
                pr_err("ndr version mismatched(version: %d, version2: %d)\n",
                       da->version, version2);
                return -EINVAL;
        }
 
-       ndr_read_int32(n);
-       da->attr = ndr_read_int32(n);
+       ret = ndr_read_int32(n, NULL);
+       if (ret)
+               return ret;
+
+       ret = ndr_read_int32(n, &da->attr);
+       if (ret)
+               return ret;
+
        if (da->version == 4) {
-               da->itime = ndr_read_int64(n);
-               da->create_time = ndr_read_int64(n);
+               ret = ndr_read_int64(n, &da->itime);
+               if (ret)
+                       return ret;
+
+               ret = ndr_read_int64(n, &da->create_time);
        } else {
-               ndr_read_int32(n);
-               ndr_read_int64(n);
-               ndr_read_int64(n);
-               da->create_time = ndr_read_int64(n);
-               ndr_read_int64(n);
+               ret = ndr_read_int32(n, NULL);
+               if (ret)
+                       return ret;
+
+               ret = ndr_read_int64(n, NULL);
+               if (ret)
+                       return ret;
+
+               ret = ndr_read_int64(n, NULL);
+               if (ret)
+                       return ret;
+
+               ret = ndr_read_int64(n, &da->create_time);
+               if (ret)
+                       return ret;
+
+               ret = ndr_read_int64(n, NULL);
        }
 
-       return 0;
+       return ret;
 }
 
 static int ndr_encode_posix_acl_entry(struct ndr *n, struct xattr_smb_acl *acl)
 {
-       int i;
+       int i, ret;
+
+       ret = ndr_write_int32(n, acl->count);
+       if (ret)
+               return ret;
 
-       ndr_write_int32(n, acl->count);
        n->offset = ALIGN(n->offset, 8);
-       ndr_write_int32(n, acl->count);
-       ndr_write_int32(n, 0);
+       ret = ndr_write_int32(n, acl->count);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int32(n, 0);
+       if (ret)
+               return ret;
 
        for (i = 0; i < acl->count; i++) {
                n->offset = ALIGN(n->offset, 8);
-               ndr_write_int16(n, acl->entries[i].type);
-               ndr_write_int16(n, acl->entries[i].type);
+               ret = ndr_write_int16(n, acl->entries[i].type);
+               if (ret)
+                       return ret;
+
+               ret = ndr_write_int16(n, acl->entries[i].type);
+               if (ret)
+                       return ret;
 
                if (acl->entries[i].type == SMB_ACL_USER) {
                        n->offset = ALIGN(n->offset, 8);
-                       ndr_write_int64(n, acl->entries[i].uid);
+                       ret = ndr_write_int64(n, acl->entries[i].uid);
                } else if (acl->entries[i].type == SMB_ACL_GROUP) {
                        n->offset = ALIGN(n->offset, 8);
-                       ndr_write_int64(n, acl->entries[i].gid);
+                       ret = ndr_write_int64(n, acl->entries[i].gid);
                }
+               if (ret)
+                       return ret;
 
                /* push permission */
-               ndr_write_int32(n, acl->entries[i].perm);
+               ret = ndr_write_int32(n, acl->entries[i].perm);
        }
 
-       return 0;
+       return ret;
 }
 
 int ndr_encode_posix_acl(struct ndr *n,
@@ -235,7 +343,8 @@ int ndr_encode_posix_acl(struct ndr *n,
                         struct xattr_smb_acl *acl,
                         struct xattr_smb_acl *def_acl)
 {
-       int ref_id = 0x00020000;
+       unsigned int ref_id = 0x00020000;
+       int ret;
 
        n->offset = 0;
        n->length = 1024;
@@ -245,35 +354,46 @@ int ndr_encode_posix_acl(struct ndr *n,
 
        if (acl) {
                /* ACL ACCESS */
-               ndr_write_int32(n, ref_id);
+               ret = ndr_write_int32(n, ref_id);
                ref_id += 4;
        } else {
-               ndr_write_int32(n, 0);
+               ret = ndr_write_int32(n, 0);
        }
+       if (ret)
+               return ret;
 
        if (def_acl) {
                /* DEFAULT ACL ACCESS */
-               ndr_write_int32(n, ref_id);
+               ret = ndr_write_int32(n, ref_id);
                ref_id += 4;
        } else {
-               ndr_write_int32(n, 0);
+               ret = ndr_write_int32(n, 0);
        }
-
-       ndr_write_int64(n, from_kuid(user_ns, inode->i_uid));
-       ndr_write_int64(n, from_kgid(user_ns, inode->i_gid));
-       ndr_write_int32(n, inode->i_mode);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int64(n, from_kuid(&init_user_ns, i_uid_into_mnt(user_ns, inode)));
+       if (ret)
+               return ret;
+       ret = ndr_write_int64(n, from_kgid(&init_user_ns, i_gid_into_mnt(user_ns, inode)));
+       if (ret)
+               return ret;
+       ret = ndr_write_int32(n, inode->i_mode);
+       if (ret)
+               return ret;
 
        if (acl) {
-               ndr_encode_posix_acl_entry(n, acl);
-               if (def_acl)
-                       ndr_encode_posix_acl_entry(n, def_acl);
+               ret = ndr_encode_posix_acl_entry(n, acl);
+               if (def_acl && !ret)
+                       ret = ndr_encode_posix_acl_entry(n, def_acl);
        }
-       return 0;
+       return ret;
 }
 
 int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
 {
-       int ref_id = 0x00020004;
+       unsigned int ref_id = 0x00020004;
+       int ret;
 
        n->offset = 0;
        n->length = 2048;
@@ -281,36 +401,65 @@ int ndr_encode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
        if (!n->data)
                return -ENOMEM;
 
-       ndr_write_int16(n, acl->version);
-       ndr_write_int32(n, acl->version);
-       ndr_write_int16(n, 2);
-       ndr_write_int32(n, ref_id);
+       ret = ndr_write_int16(n, acl->version);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int32(n, acl->version);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int16(n, 2);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int32(n, ref_id);
+       if (ret)
+               return ret;
 
        /* push hash type and hash 64bytes */
-       ndr_write_int16(n, acl->hash_type);
-       ndr_write_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
-       ndr_write_bytes(n, acl->desc, acl->desc_len);
-       ndr_write_int64(n, acl->current_time);
-       ndr_write_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+       ret = ndr_write_int16(n, acl->hash_type);
+       if (ret)
+               return ret;
 
-       /* push ndr for security descriptor */
-       ndr_write_bytes(n, acl->sd_buf, acl->sd_size);
+       ret = ndr_write_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+       if (ret)
+               return ret;
 
-       return 0;
+       ret = ndr_write_bytes(n, acl->desc, acl->desc_len);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_int64(n, acl->current_time);
+       if (ret)
+               return ret;
+
+       ret = ndr_write_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+       if (ret)
+               return ret;
+
+       /* push ndr for security descriptor */
+       ret = ndr_write_bytes(n, acl->sd_buf, acl->sd_size);
+       return ret;
 }
 
 int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
 {
-       int version2;
+       unsigned int version2;
+       int ret;
 
        n->offset = 0;
-       acl->version = ndr_read_int16(n);
+       ret = ndr_read_int16(n, &acl->version);
+       if (ret)
+               return ret;
        if (acl->version != 4) {
                pr_err("v%d version is not supported\n", acl->version);
                return -EINVAL;
        }
 
-       version2 = ndr_read_int32(n);
+       ret = ndr_read_int32(n, &version2);
+       if (ret)
+               return ret;
        if (acl->version != version2) {
                pr_err("ndr version mismatched(version: %d, version2: %d)\n",
                       acl->version, version2);
@@ -318,11 +467,22 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
        }
 
        /* Read Level */
-       ndr_read_int16(n);
+       ret = ndr_read_int16(n, NULL);
+       if (ret)
+               return ret;
+
        /* Read Ref Id */
-       ndr_read_int32(n);
-       acl->hash_type = ndr_read_int16(n);
-       ndr_read_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+       ret = ndr_read_int32(n, NULL);
+       if (ret)
+               return ret;
+
+       ret = ndr_read_int16(n, &acl->hash_type);
+       if (ret)
+               return ret;
+
+       ret = ndr_read_bytes(n, acl->hash, XATTR_SD_HASH_SIZE);
+       if (ret)
+               return ret;
 
        ndr_read_bytes(n, acl->desc, 10);
        if (strncmp(acl->desc, "posix_acl", 9)) {
@@ -331,15 +491,20 @@ int ndr_decode_v4_ntacl(struct ndr *n, struct xattr_ntacl *acl)
        }
 
        /* Read Time */
-       ndr_read_int64(n);
+       ret = ndr_read_int64(n, NULL);
+       if (ret)
+               return ret;
+
        /* Read Posix ACL hash */
-       ndr_read_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+       ret = ndr_read_bytes(n, acl->posix_acl_hash, XATTR_SD_HASH_SIZE);
+       if (ret)
+               return ret;
+
        acl->sd_size = n->length - n->offset;
        acl->sd_buf = kzalloc(acl->sd_size, GFP_KERNEL);
        if (!acl->sd_buf)
                return -ENOMEM;
 
-       ndr_read_bytes(n, acl->sd_buf, acl->sd_size);
-
-       return 0;
+       ret = ndr_read_bytes(n, acl->sd_buf, acl->sd_size);
+       return ret;
 }
index 6ace6c2..16b6236 100644 (file)
@@ -1614,9 +1614,11 @@ void create_posix_rsp_buf(char *cc, struct ksmbd_file *fp)
        buf->nlink = cpu_to_le32(inode->i_nlink);
        buf->reparse_tag = cpu_to_le32(fp->volatile_id);
        buf->mode = cpu_to_le32(inode->i_mode);
-       id_to_sid(from_kuid(user_ns, inode->i_uid),
+       id_to_sid(from_kuid_munged(&init_user_ns,
+                                  i_uid_into_mnt(user_ns, inode)),
                  SIDNFS_USER, (struct smb_sid *)&buf->SidBuffer[0]);
-       id_to_sid(from_kgid(user_ns, inode->i_gid),
+       id_to_sid(from_kgid_munged(&init_user_ns,
+                                  i_gid_into_mnt(user_ns, inode)),
                  SIDNFS_GROUP, (struct smb_sid *)&buf->SidBuffer[20]);
 }
 
index d329ea4..c86164d 100644 (file)
@@ -2381,10 +2381,12 @@ static int smb2_create_sd_buffer(struct ksmbd_work *work,
                            le32_to_cpu(sd_buf->ccontext.DataLength), true);
 }
 
-static void ksmbd_acls_fattr(struct smb_fattr *fattr, struct inode *inode)
+static void ksmbd_acls_fattr(struct smb_fattr *fattr,
+                            struct user_namespace *mnt_userns,
+                            struct inode *inode)
 {
-       fattr->cf_uid = inode->i_uid;
-       fattr->cf_gid = inode->i_gid;
+       fattr->cf_uid = i_uid_into_mnt(mnt_userns, inode);
+       fattr->cf_gid = i_gid_into_mnt(mnt_userns, inode);
        fattr->cf_mode = inode->i_mode;
        fattr->cf_acls = NULL;
        fattr->cf_dacls = NULL;
@@ -2893,7 +2895,7 @@ int smb2_open(struct ksmbd_work *work)
                                        struct smb_ntsd *pntsd;
                                        int pntsd_size, ace_num = 0;
 
-                                       ksmbd_acls_fattr(&fattr, inode);
+                                       ksmbd_acls_fattr(&fattr, user_ns, inode);
                                        if (fattr.cf_acls)
                                                ace_num = fattr.cf_acls->a_count;
                                        if (fattr.cf_dacls)
@@ -3324,7 +3326,6 @@ static int dentry_name(struct ksmbd_dir_info *d_info, int info_level)
  */
 static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                                       struct ksmbd_dir_info *d_info,
-                                      struct user_namespace *user_ns,
                                       struct ksmbd_kstat *ksmbd_kstat)
 {
        int next_entry_offset = 0;
@@ -3478,9 +3479,9 @@ static int smb2_populate_readdir_entry(struct ksmbd_conn *conn, int info_level,
                        S_ISDIR(ksmbd_kstat->kstat->mode) ? ATTR_DIRECTORY_LE : ATTR_ARCHIVE_LE;
                if (d_info->hide_dot_file && d_info->name[0] == '.')
                        posix_info->DosAttributes |= ATTR_HIDDEN_LE;
-               id_to_sid(from_kuid(user_ns, ksmbd_kstat->kstat->uid),
+               id_to_sid(from_kuid_munged(&init_user_ns, ksmbd_kstat->kstat->uid),
                          SIDNFS_USER, (struct smb_sid *)&posix_info->SidBuffer[0]);
-               id_to_sid(from_kgid(user_ns, ksmbd_kstat->kstat->gid),
+               id_to_sid(from_kgid_munged(&init_user_ns, ksmbd_kstat->kstat->gid),
                          SIDNFS_GROUP, (struct smb_sid *)&posix_info->SidBuffer[20]);
                memcpy(posix_info->name, conv_name, conv_len);
                posix_info->name_len = cpu_to_le32(conv_len);
@@ -3543,9 +3544,9 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv)
                        return -EINVAL;
 
                lock_dir(priv->dir_fp);
-               dent = lookup_one_len(priv->d_info->name,
-                                     priv->dir_fp->filp->f_path.dentry,
-                                     priv->d_info->name_len);
+               dent = lookup_one(user_ns, priv->d_info->name,
+                                 priv->dir_fp->filp->f_path.dentry,
+                                 priv->d_info->name_len);
                unlock_dir(priv->dir_fp);
 
                if (IS_ERR(dent)) {
@@ -3571,7 +3572,6 @@ static int process_query_dir_entries(struct smb2_query_dir_private *priv)
                rc = smb2_populate_readdir_entry(priv->work->conn,
                                                 priv->info_level,
                                                 priv->d_info,
-                                                user_ns,
                                                 &ksmbd_kstat);
                dput(dent);
                if (rc)
@@ -5008,7 +5008,7 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
 
        user_ns = file_mnt_user_ns(fp->filp);
        inode = file_inode(fp->filp);
-       ksmbd_acls_fattr(&fattr, inode);
+       ksmbd_acls_fattr(&fattr, user_ns, inode);
 
        if (test_share_config_flag(work->tcon->share_conf,
                                   KSMBD_SHARE_FLAG_ACL_XATTR))
@@ -5246,7 +5246,9 @@ int smb2_echo(struct ksmbd_work *work)
        return 0;
 }
 
-static int smb2_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
+static int smb2_rename(struct ksmbd_work *work,
+                      struct ksmbd_file *fp,
+                      struct user_namespace *user_ns,
                       struct smb2_file_rename_info *file_info,
                       struct nls_table *local_nls)
 {
@@ -5310,7 +5312,7 @@ static int smb2_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
                if (rc)
                        goto out;
 
-               rc = ksmbd_vfs_setxattr(file_mnt_user_ns(fp->filp),
+               rc = ksmbd_vfs_setxattr(user_ns,
                                        fp->filp->f_path.dentry,
                                        xattr_stream_name,
                                        NULL, 0, 0);
@@ -5438,11 +5440,11 @@ static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
 {
        struct smb2_file_all_info *file_info;
        struct iattr attrs;
-       struct iattr temp_attrs;
+       struct timespec64 ctime;
        struct file *filp;
        struct inode *inode;
        struct user_namespace *user_ns;
-       int rc;
+       int rc = 0;
 
        if (!(fp->daccess & FILE_WRITE_ATTRIBUTES_LE))
                return -EACCES;
@@ -5462,11 +5464,11 @@ static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
        }
 
        if (file_info->ChangeTime) {
-               temp_attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
-               attrs.ia_ctime = temp_attrs.ia_ctime;
+               attrs.ia_ctime = ksmbd_NTtimeToUnix(file_info->ChangeTime);
+               ctime = attrs.ia_ctime;
                attrs.ia_valid |= ATTR_CTIME;
        } else {
-               temp_attrs.ia_ctime = inode->i_ctime;
+               ctime = inode->i_ctime;
        }
 
        if (file_info->LastWriteTime) {
@@ -5505,13 +5507,6 @@ static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
                rc = 0;
        }
 
-       /*
-        * HACK : set ctime here to avoid ctime changed
-        * when file_info->ChangeTime is zero.
-        */
-       attrs.ia_ctime = temp_attrs.ia_ctime;
-       attrs.ia_valid |= ATTR_CTIME;
-
        if (attrs.ia_valid) {
                struct dentry *dentry = filp->f_path.dentry;
                struct inode *inode = d_inode(dentry);
@@ -5519,17 +5514,15 @@ static int set_file_basic_info(struct ksmbd_file *fp, char *buf,
                if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                        return -EACCES;
 
-               rc = setattr_prepare(user_ns, dentry, &attrs);
-               if (rc)
-                       return -EINVAL;
-
                inode_lock(inode);
-               setattr_copy(user_ns, inode, &attrs);
-               attrs.ia_valid &= ~ATTR_CTIME;
                rc = notify_change(user_ns, dentry, &attrs, NULL);
+               if (!rc) {
+                       inode->i_ctime = ctime;
+                       mark_inode_dirty(inode);
+               }
                inode_unlock(inode);
        }
-       return 0;
+       return rc;
 }
 
 static int set_file_allocation_info(struct ksmbd_work *work,
@@ -5624,6 +5617,7 @@ static int set_end_of_file_info(struct ksmbd_work *work, struct ksmbd_file *fp,
 static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
                           char *buf)
 {
+       struct user_namespace *user_ns;
        struct ksmbd_file *parent_fp;
        struct dentry *parent;
        struct dentry *dentry = fp->filp->f_path.dentry;
@@ -5634,11 +5628,12 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
                return -EACCES;
        }
 
+       user_ns = file_mnt_user_ns(fp->filp);
        if (ksmbd_stream_fd(fp))
                goto next;
 
        parent = dget_parent(dentry);
-       ret = ksmbd_vfs_lock_parent(parent, dentry);
+       ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
        if (ret) {
                dput(parent);
                return ret;
@@ -5655,7 +5650,7 @@ static int set_rename_info(struct ksmbd_work *work, struct ksmbd_file *fp,
                }
        }
 next:
-       return smb2_rename(work, fp,
+       return smb2_rename(work, fp, user_ns,
                           (struct smb2_file_rename_info *)buf,
                           work->sess->conn->local_nls);
 }
@@ -7116,8 +7111,8 @@ static int fsctl_query_iface_info_ioctl(struct ksmbd_conn *conn,
                        netdev->ethtool_ops->get_link_ksettings(netdev, &cmd);
                        speed = cmd.base.speed;
                } else {
-                       pr_err("%s %s\n", netdev->name,
-                              "speed is unknown, defaulting to 1Gb/sec");
+                       ksmbd_debug(SMB, "%s %s\n", netdev->name,
+                                   "speed is unknown, defaulting to 1Gb/sec");
                        speed = SPEED_1000;
                }
 
index b108b91..43d3123 100644 (file)
@@ -291,7 +291,6 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
                                      char *search_pattern,
                                      int (*fn)(struct ksmbd_conn *, int,
                                                struct ksmbd_dir_info *,
-                                               struct user_namespace *,
                                                struct ksmbd_kstat *))
 {
        int i, rc = 0;
@@ -322,8 +321,7 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work, int info_level,
                                                    user_ns,
                                                    dir->filp->f_path.dentry->d_parent,
                                                    &ksmbd_kstat);
-                       rc = fn(conn, info_level, d_info,
-                               user_ns, &ksmbd_kstat);
+                       rc = fn(conn, info_level, d_info, &ksmbd_kstat);
                        if (rc)
                                break;
                        if (d_info->out_buf_len <= 0)
index eb667d8..57c667c 100644 (file)
@@ -511,7 +511,6 @@ int ksmbd_populate_dot_dotdot_entries(struct ksmbd_work *work,
                                      int (*fn)(struct ksmbd_conn *,
                                                int,
                                                struct ksmbd_dir_info *,
-                                               struct user_namespace *,
                                                struct ksmbd_kstat *));
 
 int ksmbd_extract_shortname(struct ksmbd_conn *conn,
index 5456e3a..0a95cde 100644 (file)
@@ -274,24 +274,34 @@ static int sid_to_id(struct user_namespace *user_ns,
                uid_t id;
 
                id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
-               if (id > 0) {
-                       uid = make_kuid(user_ns, id);
-                       if (uid_valid(uid) && kuid_has_mapping(user_ns, uid)) {
-                               fattr->cf_uid = uid;
-                               rc = 0;
-                       }
+               /*
+                * Translate raw sid into kuid in the server's user
+                * namespace.
+                */
+               uid = make_kuid(&init_user_ns, id);
+
+               /* If this is an idmapped mount, apply the idmapping. */
+               uid = kuid_from_mnt(user_ns, uid);
+               if (uid_valid(uid)) {
+                       fattr->cf_uid = uid;
+                       rc = 0;
                }
        } else {
                kgid_t gid;
                gid_t id;
 
                id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
-               if (id > 0) {
-                       gid = make_kgid(user_ns, id);
-                       if (gid_valid(gid) && kgid_has_mapping(user_ns, gid)) {
-                               fattr->cf_gid = gid;
-                               rc = 0;
-                       }
+               /*
+                * Translate raw sid into kgid in the server's user
+                * namespace.
+                */
+               gid = make_kgid(&init_user_ns, id);
+
+               /* If this is an idmapped mount, apply the idmapping. */
+               gid = kgid_from_mnt(user_ns, gid);
+               if (gid_valid(gid)) {
+                       fattr->cf_gid = gid;
+                       rc = 0;
                }
        }
 
@@ -587,14 +597,14 @@ static void set_posix_acl_entries_dacl(struct user_namespace *user_ns,
                        uid_t uid;
                        unsigned int sid_type = SIDOWNER;
 
-                       uid = from_kuid(user_ns, pace->e_uid);
+                       uid = posix_acl_uid_translate(user_ns, pace);
                        if (!uid)
                                sid_type = SIDUNIX_USER;
                        id_to_sid(uid, sid_type, sid);
                } else if (pace->e_tag == ACL_GROUP) {
                        gid_t gid;
 
-                       gid = from_kgid(user_ns, pace->e_gid);
+                       gid = posix_acl_gid_translate(user_ns, pace);
                        id_to_sid(gid, SIDUNIX_GROUP, sid);
                } else if (pace->e_tag == ACL_OTHER && !nt_aces_num) {
                        smb_copy_sid(sid, &sid_everyone);
@@ -653,12 +663,12 @@ posix_default_acl:
                if (pace->e_tag == ACL_USER) {
                        uid_t uid;
 
-                       uid = from_kuid(user_ns, pace->e_uid);
+                       uid = posix_acl_uid_translate(user_ns, pace);
                        id_to_sid(uid, SIDCREATOR_OWNER, sid);
                } else if (pace->e_tag == ACL_GROUP) {
                        gid_t gid;
 
-                       gid = from_kgid(user_ns, pace->e_gid);
+                       gid = posix_acl_gid_translate(user_ns, pace);
                        id_to_sid(gid, SIDCREATOR_GROUP, sid);
                } else {
                        kfree(sid);
@@ -723,7 +733,7 @@ static void set_mode_dacl(struct user_namespace *user_ns,
        }
 
        /* owner RID */
-       uid = from_kuid(user_ns, fattr->cf_uid);
+       uid = from_kuid(&init_user_ns, fattr->cf_uid);
        if (uid)
                sid = &server_conf.domain_sid;
        else
@@ -739,7 +749,7 @@ static void set_mode_dacl(struct user_namespace *user_ns,
        ace_size = fill_ace_for_sid(pace, &sid_unix_groups,
                                    ACCESS_ALLOWED, 0, fattr->cf_mode, 0070);
        pace->sid.sub_auth[pace->sid.num_subauth++] =
-               cpu_to_le32(from_kgid(user_ns, fattr->cf_gid));
+               cpu_to_le32(from_kgid(&init_user_ns, fattr->cf_gid));
        pace->size = cpu_to_le16(ace_size + 4);
        size += le16_to_cpu(pace->size);
        pace = (struct smb_ace *)((char *)pndace + size);
@@ -880,7 +890,7 @@ int build_sec_desc(struct user_namespace *user_ns,
        if (!nowner_sid_ptr)
                return -ENOMEM;
 
-       uid = from_kuid(user_ns, fattr->cf_uid);
+       uid = from_kuid(&init_user_ns, fattr->cf_uid);
        if (!uid)
                sid_type = SIDUNIX_USER;
        id_to_sid(uid, sid_type, nowner_sid_ptr);
@@ -891,7 +901,7 @@ int build_sec_desc(struct user_namespace *user_ns,
                return -ENOMEM;
        }
 
-       gid = from_kgid(user_ns, fattr->cf_gid);
+       gid = from_kgid(&init_user_ns, fattr->cf_gid);
        id_to_sid(gid, SIDUNIX_GROUP, ngroup_sid_ptr);
 
        offset = sizeof(struct smb_ntsd);
@@ -1234,11 +1244,9 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
                        pa_entry = posix_acls->a_entries;
                        for (i = 0; i < posix_acls->a_count; i++, pa_entry++) {
                                if (pa_entry->e_tag == ACL_USER)
-                                       id = from_kuid(user_ns,
-                                                      pa_entry->e_uid);
+                                       id = posix_acl_uid_translate(user_ns, pa_entry);
                                else if (pa_entry->e_tag == ACL_GROUP)
-                                       id = from_kgid(user_ns,
-                                                      pa_entry->e_gid);
+                                       id = posix_acl_gid_translate(user_ns, pa_entry);
                                else
                                        continue;
 
@@ -1322,22 +1330,31 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
        newattrs.ia_valid |= ATTR_MODE;
        newattrs.ia_mode = (inode->i_mode & ~0777) | (fattr.cf_mode & 0777);
 
-       inode_lock(inode);
-       rc = notify_change(user_ns, path->dentry, &newattrs, NULL);
-       inode_unlock(inode);
-       if (rc)
-               goto out;
-
        ksmbd_vfs_remove_acl_xattrs(user_ns, path->dentry);
        /* Update posix acls */
        if (IS_ENABLED(CONFIG_FS_POSIX_ACL) && fattr.cf_dacls) {
                rc = set_posix_acl(user_ns, inode,
                                   ACL_TYPE_ACCESS, fattr.cf_acls);
-               if (S_ISDIR(inode->i_mode) && fattr.cf_dacls)
+               if (rc < 0)
+                       ksmbd_debug(SMB,
+                                   "Set posix acl(ACL_TYPE_ACCESS) failed, rc : %d\n",
+                                   rc);
+               if (S_ISDIR(inode->i_mode) && fattr.cf_dacls) {
                        rc = set_posix_acl(user_ns, inode,
                                           ACL_TYPE_DEFAULT, fattr.cf_dacls);
+                       if (rc)
+                               ksmbd_debug(SMB,
+                                           "Set posix acl(ACL_TYPE_DEFAULT) failed, rc : %d\n",
+                                           rc);
+               }
        }
 
+       inode_lock(inode);
+       rc = notify_change(user_ns, path->dentry, &newattrs, NULL);
+       inode_unlock(inode);
+       if (rc)
+               goto out;
+
        /* Check it only calling from SD BUFFER context */
        if (type_check && !(le16_to_cpu(pntsd->type) & DACL_PRESENT))
                goto out;
index 940f686..73e08ca 100644 (file)
@@ -209,4 +209,29 @@ int set_info_sec(struct ksmbd_conn *conn, struct ksmbd_tree_connect *tcon,
                 bool type_check);
 void id_to_sid(unsigned int cid, uint sidtype, struct smb_sid *ssid);
 void ksmbd_init_domain(u32 *sub_auth);
+
+static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns,
+                                           struct posix_acl_entry *pace)
+{
+       kuid_t kuid;
+
+       /* If this is an idmapped mount, apply the idmapping. */
+       kuid = kuid_into_mnt(mnt_userns, pace->e_uid);
+
+       /* Translate the kuid into a userspace id ksmbd would see. */
+       return from_kuid(&init_user_ns, kuid);
+}
+
+static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns,
+                                           struct posix_acl_entry *pace)
+{
+       kgid_t kgid;
+
+       /* If this is an idmapped mount, apply the idmapping. */
+       kgid = kgid_into_mnt(mnt_userns, pace->e_gid);
+
+       /* Translate the kgid into a userspace id ksmbd would see. */
+       return from_kgid(&init_user_ns, kgid);
+}
+
 #endif /* _SMBACL_H */
index 58f5300..52b2556 100644 (file)
@@ -1168,7 +1168,7 @@ static int smb_direct_post_send_data(struct smb_direct_transport *t,
                        pr_err("failed to map buffer\n");
                        ret = -ENOMEM;
                        goto err;
-               } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES - 1) {
+               } else if (sg_cnt + msg->num_sge > SMB_DIRECT_MAX_SEND_SGES) {
                        pr_err("buffer not fitted into sges\n");
                        ret = -E2BIG;
                        ib_dma_unmap_sg(t->cm_id->device, sg, sg_cnt,
index aee28ee..b047f29 100644 (file)
@@ -69,14 +69,15 @@ static void ksmbd_vfs_inherit_owner(struct ksmbd_work *work,
  *
  * the reference count of @parent isn't incremented.
  */
-int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child)
+int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
+                         struct dentry *child)
 {
        struct dentry *dentry;
        int ret = 0;
 
        inode_lock_nested(d_inode(parent), I_MUTEX_PARENT);
-       dentry = lookup_one_len(child->d_name.name, parent,
-                               child->d_name.len);
+       dentry = lookup_one(user_ns, child->d_name.name, parent,
+                           child->d_name.len);
        if (IS_ERR(dentry)) {
                ret = PTR_ERR(dentry);
                goto out_err;
@@ -102,7 +103,7 @@ int ksmbd_vfs_may_delete(struct user_namespace *user_ns,
        int ret;
 
        parent = dget_parent(dentry);
-       ret = ksmbd_vfs_lock_parent(parent, dentry);
+       ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
        if (ret) {
                dput(parent);
                return ret;
@@ -137,7 +138,7 @@ int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
                *daccess |= FILE_EXECUTE_LE;
 
        parent = dget_parent(dentry);
-       ret = ksmbd_vfs_lock_parent(parent, dentry);
+       ret = ksmbd_vfs_lock_parent(user_ns, parent, dentry);
        if (ret) {
                dput(parent);
                return ret;
@@ -197,6 +198,7 @@ int ksmbd_vfs_create(struct ksmbd_work *work, const char *name, umode_t mode)
  */
 int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
 {
+       struct user_namespace *user_ns;
        struct path path;
        struct dentry *dentry;
        int err;
@@ -210,16 +212,16 @@ int ksmbd_vfs_mkdir(struct ksmbd_work *work, const char *name, umode_t mode)
                return err;
        }
 
+       user_ns = mnt_user_ns(path.mnt);
        mode |= S_IFDIR;
-       err = vfs_mkdir(mnt_user_ns(path.mnt), d_inode(path.dentry),
-                       dentry, mode);
+       err = vfs_mkdir(user_ns, d_inode(path.dentry), dentry, mode);
        if (err) {
                goto out;
        } else if (d_unhashed(dentry)) {
                struct dentry *d;
 
-               d = lookup_one_len(dentry->d_name.name, dentry->d_parent,
-                                  dentry->d_name.len);
+               d = lookup_one(user_ns, dentry->d_name.name, dentry->d_parent,
+                              dentry->d_name.len);
                if (IS_ERR(d)) {
                        err = PTR_ERR(d);
                        goto out;
@@ -582,6 +584,7 @@ int ksmbd_vfs_fsync(struct ksmbd_work *work, u64 fid, u64 p_id)
  */
 int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
 {
+       struct user_namespace *user_ns;
        struct path path;
        struct dentry *parent;
        int err;
@@ -601,8 +604,9 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
                return err;
        }
 
+       user_ns = mnt_user_ns(path.mnt);
        parent = dget_parent(path.dentry);
-       err = ksmbd_vfs_lock_parent(parent, path.dentry);
+       err = ksmbd_vfs_lock_parent(user_ns, parent, path.dentry);
        if (err) {
                dput(parent);
                path_put(&path);
@@ -616,14 +620,12 @@ int ksmbd_vfs_remove_file(struct ksmbd_work *work, char *name)
        }
 
        if (S_ISDIR(d_inode(path.dentry)->i_mode)) {
-               err = vfs_rmdir(mnt_user_ns(path.mnt), d_inode(parent),
-                               path.dentry);
+               err = vfs_rmdir(user_ns, d_inode(parent), path.dentry);
                if (err && err != -ENOTEMPTY)
                        ksmbd_debug(VFS, "%s: rmdir failed, err %d\n", name,
                                    err);
        } else {
-               err = vfs_unlink(mnt_user_ns(path.mnt), d_inode(parent),
-                                path.dentry, NULL);
+               err = vfs_unlink(user_ns, d_inode(parent), path.dentry, NULL);
                if (err)
                        ksmbd_debug(VFS, "%s: unlink failed, err %d\n", name,
                                    err);
@@ -748,7 +750,8 @@ static int __ksmbd_vfs_rename(struct ksmbd_work *work,
        if (ksmbd_override_fsids(work))
                return -ENOMEM;
 
-       dst_dent = lookup_one_len(dst_name, dst_dent_parent, strlen(dst_name));
+       dst_dent = lookup_one(dst_user_ns, dst_name, dst_dent_parent,
+                             strlen(dst_name));
        err = PTR_ERR(dst_dent);
        if (IS_ERR(dst_dent)) {
                pr_err("lookup failed %s [%d]\n", dst_name, err);
@@ -779,6 +782,7 @@ out:
 int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
                        char *newname)
 {
+       struct user_namespace *user_ns;
        struct path dst_path;
        struct dentry *src_dent_parent, *dst_dent_parent;
        struct dentry *src_dent, *trap_dent, *src_child;
@@ -808,8 +812,9 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
        trap_dent = lock_rename(src_dent_parent, dst_dent_parent);
        dget(src_dent);
        dget(dst_dent_parent);
-       src_child = lookup_one_len(src_dent->d_name.name, src_dent_parent,
-                                  src_dent->d_name.len);
+       user_ns = file_mnt_user_ns(fp->filp);
+       src_child = lookup_one(user_ns, src_dent->d_name.name, src_dent_parent,
+                              src_dent->d_name.len);
        if (IS_ERR(src_child)) {
                err = PTR_ERR(src_child);
                goto out_lock;
@@ -823,7 +828,7 @@ int ksmbd_vfs_fp_rename(struct ksmbd_work *work, struct ksmbd_file *fp,
        dput(src_child);
 
        err = __ksmbd_vfs_rename(work,
-                                file_mnt_user_ns(fp->filp),
+                                user_ns,
                                 src_dent_parent,
                                 src_dent,
                                 mnt_user_ns(dst_path.mnt),
@@ -1109,7 +1114,7 @@ int ksmbd_vfs_unlink(struct user_namespace *user_ns,
 {
        int err = 0;
 
-       err = ksmbd_vfs_lock_parent(dir, dentry);
+       err = ksmbd_vfs_lock_parent(user_ns, dir, dentry);
        if (err)
                return err;
        dget(dentry);
@@ -1385,14 +1390,14 @@ static struct xattr_smb_acl *ksmbd_vfs_make_xattr_posix_acl(struct user_namespac
                switch (pa_entry->e_tag) {
                case ACL_USER:
                        xa_entry->type = SMB_ACL_USER;
-                       xa_entry->uid = from_kuid(user_ns, pa_entry->e_uid);
+                       xa_entry->uid = posix_acl_uid_translate(user_ns, pa_entry);
                        break;
                case ACL_USER_OBJ:
                        xa_entry->type = SMB_ACL_USER_OBJ;
                        break;
                case ACL_GROUP:
                        xa_entry->type = SMB_ACL_GROUP;
-                       xa_entry->gid = from_kgid(user_ns, pa_entry->e_gid);
+                       xa_entry->gid = posix_acl_gid_translate(user_ns, pa_entry);
                        break;
                case ACL_GROUP_OBJ:
                        xa_entry->type = SMB_ACL_GROUP_OBJ;
index cb0cba0..85db50a 100644 (file)
@@ -107,7 +107,8 @@ struct ksmbd_kstat {
        __le32                  file_attributes;
 };
 
-int ksmbd_vfs_lock_parent(struct dentry *parent, struct dentry *child);
+int ksmbd_vfs_lock_parent(struct user_namespace *user_ns, struct dentry *parent,
+                         struct dentry *child);
 int ksmbd_vfs_may_delete(struct user_namespace *user_ns, struct dentry *dentry);
 int ksmbd_vfs_query_maximal_access(struct user_namespace *user_ns,
                                   struct dentry *dentry, __le32 *daccess);
index 92d8c61..29c1db6 100644 (file)
@@ -666,22 +666,6 @@ void ksmbd_free_global_file_table(void)
        ksmbd_destroy_file_table(&global_ft);
 }
 
-int ksmbd_file_table_flush(struct ksmbd_work *work)
-{
-       struct ksmbd_file       *fp = NULL;
-       unsigned int            id;
-       int                     ret;
-
-       read_lock(&work->sess->file_table.lock);
-       idr_for_each_entry(work->sess->file_table.idr, fp, id) {
-               ret = ksmbd_vfs_fsync(work, fp->volatile_id, KSMBD_NO_FID);
-               if (ret)
-                       break;
-       }
-       read_unlock(&work->sess->file_table.lock);
-       return ret;
-}
-
 int ksmbd_init_file_table(struct ksmbd_file_table *ft)
 {
        ft->idr = kzalloc(sizeof(struct idr), GFP_KERNEL);
index 70dfe6a..448576f 100644 (file)
@@ -152,7 +152,6 @@ void ksmbd_close_session_fds(struct ksmbd_work *work);
 int ksmbd_close_inode_fds(struct ksmbd_work *work, struct inode *inode);
 int ksmbd_init_global_file_table(void);
 void ksmbd_free_global_file_table(void);
-int ksmbd_file_table_flush(struct ksmbd_work *work);
 void ksmbd_set_fd_limit(unsigned long limit);
 
 /*
index 95006d1..fa1d991 100644 (file)
@@ -531,6 +531,7 @@ static int fsnotify_attach_connector_to_object(fsnotify_connp_t *connp,
                /* Someone else created list structure for us */
                if (inode)
                        fsnotify_put_inode_ref(inode);
+               fsnotify_put_sb_connectors(conn);
                kmem_cache_free(fsnotify_mark_connector_cachep, conn);
        }
 
index 9a86d3e..c4e0cd1 100644 (file)
@@ -330,6 +330,15 @@ xfs_set_inode_alloc(
        return xfs_is_inode32(mp) ? maxagi : agcount;
 }
 
+static bool
+xfs_buftarg_is_dax(
+       struct super_block      *sb,
+       struct xfs_buftarg      *bt)
+{
+       return dax_supported(bt->bt_daxdev, bt->bt_bdev, sb->s_blocksize, 0,
+                       bdev_nr_sectors(bt->bt_bdev));
+}
+
 STATIC int
 xfs_blkdev_get(
        xfs_mount_t             *mp,
@@ -1588,11 +1597,10 @@ xfs_fs_fill_super(
                xfs_warn(mp,
                "DAX enabled. Warning: EXPERIMENTAL, use at your own risk");
 
-               datadev_is_dax = bdev_dax_supported(mp->m_ddev_targp->bt_bdev,
-                       sb->s_blocksize);
+               datadev_is_dax = xfs_buftarg_is_dax(sb, mp->m_ddev_targp);
                if (mp->m_rtdev_targp)
-                       rtdev_is_dax = bdev_dax_supported(
-                               mp->m_rtdev_targp->bt_bdev, sb->s_blocksize);
+                       rtdev_is_dax = xfs_buftarg_is_dax(sb,
+                                               mp->m_rtdev_targp);
                if (!rtdev_is_dax && !datadev_is_dax) {
                        xfs_alert(mp,
                        "DAX unsupported by block device. Turning off DAX.");
index cd905b4..13f5aa6 100644 (file)
 /*
  * If the divisor happens to be constant, we determine the appropriate
  * inverse at compile time to turn the division into a few inline
- * multiplications which ought to be much faster. And yet only if compiling
- * with a sufficiently recent gcc version to perform proper 64-bit constant
- * propagation.
+ * multiplications which ought to be much faster.
  *
  * (It is unfortunate that gcc doesn't perform all this internally.)
  */
 
-#ifndef __div64_const32_is_OK
-#define __div64_const32_is_OK (__GNUC__ >= 4)
-#endif
-
 #define __div64_const32(n, ___b)                                       \
 ({                                                                     \
        /*                                                              \
@@ -230,8 +224,7 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
            is_power_of_2(__base)) {                    \
                __rem = (n) & (__base - 1);             \
                (n) >>= ilog2(__base);                  \
-       } else if (__div64_const32_is_OK &&             \
-                  __builtin_constant_p(__base) &&      \
+       } else if (__builtin_constant_p(__base) &&      \
                   __base != 0) {                       \
                uint32_t __res_lo, __n_lo = (n);        \
                (n) = __div64_const32(n, __base);       \
@@ -241,8 +234,9 @@ extern uint32_t __div64_32(uint64_t *dividend, uint32_t divisor);
        } else if (likely(((n) >> 32) == 0)) {          \
                __rem = (uint32_t)(n) % __base;         \
                (n) = (uint32_t)(n) / __base;           \
-       } else                                          \
+       } else {                                        \
                __rem = __div64_32(&(n), __base);       \
+       }                                               \
        __rem;                                          \
  })
 
index 818680c..b20e89d 100644 (file)
 #ifndef _TTM_TT_H_
 #define _TTM_TT_H_
 
+#include <linux/pagemap.h>
 #include <linux/types.h>
 #include <drm/ttm/ttm_caching.h>
 #include <drm/ttm/ttm_kmap_iter.h>
 
-struct ttm_bo_device;
+struct ttm_device;
 struct ttm_tt;
 struct ttm_resource;
 struct ttm_buffer_object;
index b52f084..2619d94 100644 (file)
@@ -41,7 +41,6 @@ struct dax_operations {
 extern struct attribute_group dax_attribute_group;
 
 #if IS_ENABLED(CONFIG_DAX)
-struct dax_device *dax_get_by_host(const char *host);
 struct dax_device *alloc_dax(void *private, const char *host,
                const struct dax_operations *ops, unsigned long flags);
 void put_dax(struct dax_device *dax_dev);
@@ -58,8 +57,6 @@ static inline void set_dax_synchronous(struct dax_device *dax_dev)
 {
        __set_dax_synchronous(dax_dev);
 }
-bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
-               int blocksize, sector_t start, sector_t len);
 /*
  * Check if given mapping is supported by the file / underlying device.
  */
@@ -73,10 +70,6 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
        return dax_synchronous(dax_dev);
 }
 #else
-static inline struct dax_device *dax_get_by_host(const char *host)
-{
-       return NULL;
-}
 static inline struct dax_device *alloc_dax(void *private, const char *host,
                const struct dax_operations *ops, unsigned long flags)
 {
@@ -106,12 +99,6 @@ static inline bool dax_synchronous(struct dax_device *dax_dev)
 static inline void set_dax_synchronous(struct dax_device *dax_dev)
 {
 }
-static inline bool dax_supported(struct dax_device *dax_dev,
-               struct block_device *bdev, int blocksize, sector_t start,
-               sector_t len)
-{
-       return false;
-}
 static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
                                struct dax_device *dax_dev)
 {
@@ -122,22 +109,12 @@ static inline bool daxdev_mapping_supported(struct vm_area_struct *vma,
 struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
-bool __bdev_dax_supported(struct block_device *bdev, int blocksize);
-static inline bool bdev_dax_supported(struct block_device *bdev, int blocksize)
-{
-       return __bdev_dax_supported(bdev, blocksize);
-}
-
-bool __generic_fsdax_supported(struct dax_device *dax_dev,
+bool generic_fsdax_supported(struct dax_device *dax_dev,
                struct block_device *bdev, int blocksize, sector_t start,
                sector_t sectors);
-static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
-               struct block_device *bdev, int blocksize, sector_t start,
-               sector_t sectors)
-{
-       return __generic_fsdax_supported(dax_dev, bdev, blocksize, start,
-                       sectors);
-}
+
+bool dax_supported(struct dax_device *dax_dev, struct block_device *bdev,
+               int blocksize, sector_t start, sector_t len);
 
 static inline void fs_put_dax(struct dax_device *dax_dev)
 {
@@ -153,15 +130,11 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st
 dax_entry_t dax_lock_page(struct page *page);
 void dax_unlock_page(struct page *page, dax_entry_t cookie);
 #else
-static inline bool bdev_dax_supported(struct block_device *bdev,
-               int blocksize)
-{
-       return false;
-}
+#define generic_fsdax_supported                NULL
 
-static inline bool generic_fsdax_supported(struct dax_device *dax_dev,
+static inline bool dax_supported(struct dax_device *dax_dev,
                struct block_device *bdev, int blocksize, sector_t start,
-               sector_t sectors)
+               sector_t len)
 {
        return false;
 }
index 93c3ca5..e5c2c9e 100644 (file)
@@ -380,6 +380,7 @@ enum dma_slave_buswidth {
        DMA_SLAVE_BUSWIDTH_16_BYTES = 16,
        DMA_SLAVE_BUSWIDTH_32_BYTES = 32,
        DMA_SLAVE_BUSWIDTH_64_BYTES = 64,
+       DMA_SLAVE_BUSWIDTH_128_BYTES = 128,
 };
 
 /**
@@ -398,7 +399,7 @@ enum dma_slave_buswidth {
  * @src_addr_width: this is the width in bytes of the source (RX)
  * register where DMA data shall be read. If the source
  * is memory this may be ignored depending on architecture.
- * Legal values: 1, 2, 3, 4, 8, 16, 32, 64.
+ * Legal values: 1, 2, 3, 4, 8, 16, 32, 64, 128.
  * @dst_addr_width: same as src_addr_width but for destination
  * target (TX) mutatis mutandis.
  * @src_maxburst: the maximum number of words (note: words, as in
index 593322c..3337745 100644 (file)
@@ -68,4 +68,22 @@ static inline void eventpoll_release(struct file *file) {}
 
 #endif
 
+#if defined(CONFIG_ARM) && defined(CONFIG_OABI_COMPAT)
+/* ARM OABI has an incompatible struct layout and needs a special handler */
+extern struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+                struct epoll_event __user *uevent);
+#else
+static inline struct epoll_event __user *
+epoll_put_uevent(__poll_t revents, __u64 data,
+                struct epoll_event __user *uevent)
+{
+       if (__put_user(revents, &uevent->events) ||
+           __put_user(data, &uevent->data))
+               return NULL;
+
+       return uevent+1;
+}
+#endif
+
 #endif /* #ifndef _LINUX_EVENTPOLL_H */
index 37ad9a7..e7a6333 100644 (file)
@@ -3439,6 +3439,8 @@ extern int buffer_migrate_page_norefs(struct address_space *,
 #define buffer_migrate_page_norefs NULL
 #endif
 
+int may_setattr(struct user_namespace *mnt_userns, struct inode *inode,
+               unsigned int ia_valid);
 int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *);
 extern int inode_newsize_ok(const struct inode *, loff_t offset);
 void setattr_copy(struct user_namespace *, struct inode *inode,
@@ -3592,7 +3594,7 @@ int proc_nr_dentry(struct ctl_table *table, int write,
                  void *buffer, size_t *lenp, loff_t *ppos);
 int proc_nr_inodes(struct ctl_table *table, int write,
                   void *buffer, size_t *lenp, loff_t *ppos);
-int __init get_filesystem_list(char *buf);
+int __init list_bdev_fs_names(char *buf, size_t size);
 
 #define __FMODE_EXEC           ((__force int) FMODE_EXEC)
 #define __FMODE_NONOTIFY       ((__force int) FMODE_NONOTIFY)
index b34a094..860ba4b 100644 (file)
@@ -41,36 +41,39 @@ struct dw_dma_slave {
 
 /**
  * struct dw_dma_platform_data - Controller configuration parameters
+ * @nr_masters: Number of AHB masters supported by the controller
  * @nr_channels: Number of channels supported by hardware (max 8)
  * @chan_allocation_order: Allocate channels starting from 0 or 7
  * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
  * @block_size: Maximum block size supported by the controller
- * @nr_masters: Number of AHB masters supported by the controller
  * @data_width: Maximum data width supported by hardware per AHB master
  *             (in bytes, power of 2)
  * @multi_block: Multi block transfers supported by hardware per channel.
  * @max_burst: Maximum value of burst transaction size supported by hardware
  *            per channel (in units of CTL.SRC_TR_WIDTH/CTL.DST_TR_WIDTH).
  * @protctl: Protection control signals setting per channel.
+ * @quirks: Optional platform quirks.
  */
 struct dw_dma_platform_data {
-       unsigned int    nr_channels;
+       u32             nr_masters;
+       u32             nr_channels;
 #define CHAN_ALLOCATION_ASCENDING      0       /* zero to seven */
 #define CHAN_ALLOCATION_DESCENDING     1       /* seven to zero */
-       unsigned char   chan_allocation_order;
+       u32             chan_allocation_order;
 #define CHAN_PRIORITY_ASCENDING                0       /* chan0 highest */
 #define CHAN_PRIORITY_DESCENDING       1       /* chan7 highest */
-       unsigned char   chan_priority;
-       unsigned int    block_size;
-       unsigned char   nr_masters;
-       unsigned char   data_width[DW_DMA_MAX_NR_MASTERS];
-       unsigned char   multi_block[DW_DMA_MAX_NR_CHANNELS];
+       u32             chan_priority;
+       u32             block_size;
+       u32             data_width[DW_DMA_MAX_NR_MASTERS];
+       u32             multi_block[DW_DMA_MAX_NR_CHANNELS];
        u32             max_burst[DW_DMA_MAX_NR_CHANNELS];
 #define CHAN_PROTCTL_PRIVILEGED                BIT(0)
 #define CHAN_PROTCTL_BUFFERABLE                BIT(1)
 #define CHAN_PROTCTL_CACHEABLE         BIT(2)
 #define CHAN_PROTCTL_MASK              GENMASK(2, 0)
-       unsigned char   protctl;
+       u32             protctl;
+#define DW_DMA_QUIRK_XBAR_PRESENT      BIT(0)
+       u32             quirks;
 };
 
 #endif /* _PLATFORM_DATA_DMA_DW_H */
index 60a3ab0..252243c 100644 (file)
@@ -1373,6 +1373,9 @@ long ksys_old_shmctl(int shmid, int cmd, struct shmid_ds __user *buf);
 long compat_ksys_semtimedop(int semid, struct sembuf __user *tsems,
                            unsigned int nsops,
                            const struct old_timespec32 __user *timeout);
+long __do_semtimedop(int semid, struct sembuf *tsems, unsigned int nsops,
+                    const struct timespec64 *timeout,
+                    struct ipc_namespace *ns);
 
 int __sys_getsockopt(int fd, int level, int optname, char __user *optval,
                int __user *optlen);
index 82c3c3e..5265024 100644 (file)
@@ -47,6 +47,7 @@ struct iov_iter {
                };
                loff_t xarray_start;
        };
+       size_t truncated;
 };
 
 static inline enum iter_type iov_iter_type(const struct iov_iter *i)
@@ -254,8 +255,10 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
         * conversion in assignement is by definition greater than all
         * values of size_t, including old i->count.
         */
-       if (i->count > count)
+       if (i->count > count) {
+               i->truncated += i->count - count;
                i->count = count;
+       }
 }
 
 /*
@@ -264,6 +267,7 @@ static inline void iov_iter_truncate(struct iov_iter *i, u64 count)
  */
 static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 {
+       i->truncated -= count - i->count;
        i->count = count;
 }
 
index f6e8a00..8d206f2 100644 (file)
@@ -50,7 +50,7 @@ enum { CXL_CMDS };
 #define ___C(a, b) { b }
 static const struct {
        const char *name;
-} cxl_command_names[] = { CXL_CMDS };
+} cxl_command_names[] __attribute__((__unused__)) = { CXL_CMDS };
 
 /*
  * Here's how this actually breaks out:
index edc346a..c750eac 100644 (file)
@@ -9,6 +9,30 @@
 #include <stdint.h>
 #endif
 
+/* Driver command error status */
+enum idxd_scmd_stat {
+       IDXD_SCMD_DEV_ENABLED = 0x80000010,
+       IDXD_SCMD_DEV_NOT_ENABLED = 0x80000020,
+       IDXD_SCMD_WQ_ENABLED = 0x80000021,
+       IDXD_SCMD_DEV_DMA_ERR = 0x80020000,
+       IDXD_SCMD_WQ_NO_GRP = 0x80030000,
+       IDXD_SCMD_WQ_NO_NAME = 0x80040000,
+       IDXD_SCMD_WQ_NO_SVM = 0x80050000,
+       IDXD_SCMD_WQ_NO_THRESH = 0x80060000,
+       IDXD_SCMD_WQ_PORTAL_ERR = 0x80070000,
+       IDXD_SCMD_WQ_RES_ALLOC_ERR = 0x80080000,
+       IDXD_SCMD_PERCPU_ERR = 0x80090000,
+       IDXD_SCMD_DMA_CHAN_ERR = 0x800a0000,
+       IDXD_SCMD_CDEV_ERR = 0x800b0000,
+       IDXD_SCMD_WQ_NO_SWQ_SUPPORT = 0x800c0000,
+       IDXD_SCMD_WQ_NONE_CONFIGURED = 0x800d0000,
+       IDXD_SCMD_WQ_NO_SIZE = 0x800e0000,
+       IDXD_SCMD_WQ_NO_PRIV = 0x800f0000,
+};
+
+#define IDXD_SCMD_SOFTERR_MASK 0x80000000
+#define IDXD_SCMD_SOFTERR_SHIFT        16
+
 /* Descriptor flags */
 #define IDXD_OP_FLAG_FENCE     0x0001
 #define IDXD_OP_FLAG_BOF       0x0002
index 89daa88..668b07c 100644 (file)
@@ -9,13 +9,14 @@
 
 /**
  * enum virtio_pcidev_ops - virtual PCI device operations
+ * @VIRTIO_PCIDEV_OP_RESERVED: reserved to catch errors
  * @VIRTIO_PCIDEV_OP_CFG_READ: read config space, size is 1, 2, 4 or 8;
  *     the @data field should be filled in by the device (in little endian).
  * @VIRTIO_PCIDEV_OP_CFG_WRITE: write config space, size is 1, 2, 4 or 8;
  *     the @data field contains the data to write (in little endian).
- * @VIRTIO_PCIDEV_OP_BAR_READ: read BAR mem/pio, size can be variable;
+ * @VIRTIO_PCIDEV_OP_MMIO_READ: read BAR mem/pio, size can be variable;
  *     the @data field should be filled in by the device (in little endian).
- * @VIRTIO_PCIDEV_OP_BAR_WRITE: write BAR mem/pio, size can be variable;
+ * @VIRTIO_PCIDEV_OP_MMIO_WRITE: write BAR mem/pio, size can be variable;
  *     the @data field contains the data to write (in little endian).
  * @VIRTIO_PCIDEV_OP_MMIO_MEMSET: memset MMIO, size is variable but
  *     the @data field only has one byte (unlike @VIRTIO_PCIDEV_OP_MMIO_WRITE)
index a47a731..7cc2a0f 100644 (file)
@@ -276,7 +276,17 @@ enum hl_device_status {
        HL_DEVICE_STATUS_OPERATIONAL,
        HL_DEVICE_STATUS_IN_RESET,
        HL_DEVICE_STATUS_MALFUNCTION,
-       HL_DEVICE_STATUS_NEEDS_RESET
+       HL_DEVICE_STATUS_NEEDS_RESET,
+       HL_DEVICE_STATUS_IN_DEVICE_CREATION,
+       HL_DEVICE_STATUS_LAST = HL_DEVICE_STATUS_IN_DEVICE_CREATION
+};
+
+enum hl_server_type {
+       HL_SERVER_TYPE_UNKNOWN = 0,
+       HL_SERVER_GAUDI_HLS1 = 1,
+       HL_SERVER_GAUDI_HLS1H = 2,
+       HL_SERVER_GAUDI_TYPE1 = 3,
+       HL_SERVER_GAUDI_TYPE2 = 4
 };
 
 /* Opcode for management ioctl
@@ -337,17 +347,49 @@ enum hl_device_status {
 #define HL_INFO_VERSION_MAX_LEN        128
 #define HL_INFO_CARD_NAME_MAX_LEN      16
 
+/**
+ * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC
+ * @sram_base_address: The first SRAM physical base address that is free to be
+ *                     used by the user.
+ * @dram_base_address: The first DRAM virtual or physical base address that is
+ *                     free to be used by the user.
+ * @dram_size: The DRAM size that is available to the user.
+ * @sram_size: The SRAM size that is available to the user.
+ * @num_of_events: The number of events that can be received from the f/w. This
+ *                 is needed so the user can what is the size of the h/w events
+ *                 array he needs to pass to the kernel when he wants to fetch
+ *                 the event counters.
+ * @device_id: PCI device ID of the ASIC.
+ * @module_id: Module ID of the ASIC for mezzanine cards in servers
+ *             (From OCP spec).
+ * @first_available_interrupt_id: The first available interrupt ID for the user
+ *                                to be used when it works with user interrupts.
+ * @server_type: Server type that the Gaudi ASIC is currently installed in.
+ *               The value is according to enum hl_server_type
+ * @cpld_version: CPLD version on the board.
+ * @psoc_pci_pll_nr: PCI PLL NR value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_nf: PCI PLL NF value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_od: PCI PLL OD value. Needed by the profiler in some ASICs.
+ * @psoc_pci_pll_div_factor: PCI PLL DIV factor value. Needed by the profiler
+ *                           in some ASICs.
+ * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant
+ *                    for Goya/Gaudi only.
+ * @dram_enabled: Whether the DRAM is enabled.
+ * @cpucp_version: The CPUCP f/w version.
+ * @card_name: The card name as passed by the f/w.
+ * @dram_page_size: The DRAM physical page size.
+ */
 struct hl_info_hw_ip_info {
        __u64 sram_base_address;
        __u64 dram_base_address;
        __u64 dram_size;
        __u32 sram_size;
        __u32 num_of_events;
-       __u32 device_id; /* PCI Device ID */
-       __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
+       __u32 device_id;
+       __u32 module_id;
        __u32 reserved;
        __u16 first_available_interrupt_id;
-       __u16 reserved2;
+       __u16 server_type;
        __u32 cpld_version;
        __u32 psoc_pci_pll_nr;
        __u32 psoc_pci_pll_nf;
@@ -358,7 +400,7 @@ struct hl_info_hw_ip_info {
        __u8 pad[2];
        __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
        __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
-       __u64 reserved3;
+       __u64 reserved2;
        __u64 dram_page_size;
 };
 
@@ -628,12 +670,21 @@ struct hl_cs_chunk {
                __u64 cb_handle;
 
                /* Relevant only when HL_CS_FLAGS_WAIT or
-                * HL_CS_FLAGS_COLLECTIVE_WAIT is set.
+                * HL_CS_FLAGS_COLLECTIVE_WAIT is set
                 * This holds address of array of u64 values that contain
-                * signal CS sequence numbers. The wait described by this job
-                * will listen on all those signals (wait event per signal)
+                * signal CS sequence numbers. The wait described by
+                * this job will listen on all those signals
+                * (wait event per signal)
                 */
                __u64 signal_seq_arr;
+
+               /*
+                * Relevant only when HL_CS_FLAGS_WAIT or
+                * HL_CS_FLAGS_COLLECTIVE_WAIT is set
+                * along with HL_CS_FLAGS_ENCAP_SIGNALS.
+                * This is the CS sequence which has the encapsulated signals.
+                */
+               __u64 encaps_signal_seq;
        };
 
        /* Index of queue to put the CB on */
@@ -651,6 +702,17 @@ struct hl_cs_chunk {
                 * Number of entries in signal_seq_arr
                 */
                __u32 num_signal_seq_arr;
+
+               /* Relevant only when HL_CS_FLAGS_WAIT or
+                * HL_CS_FLAGS_COLLECTIVE_WAIT is set along
+                * with HL_CS_FLAGS_ENCAP_SIGNALS
+                * This set the signals range that the user want to wait for
+                * out of the whole reserved signals range.
+                * e.g if the signals range is 20, and user don't want
+                * to wait for signal 8, so he set this offset to 7, then
+                * he call the API again with 9 and so on till 20.
+                */
+               __u32 encaps_signal_offset;
        };
 
        /* HL_CS_CHUNK_FLAGS_* */
@@ -678,6 +740,28 @@ struct hl_cs_chunk {
 #define HL_CS_FLAGS_CUSTOM_TIMEOUT             0x200
 #define HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT      0x400
 
+/*
+ * The encapsulated signals CS is merged into the existing CS ioctls.
+ * In order to use this feature need to follow the below procedure:
+ * 1. Reserve signals, set the CS type to HL_CS_FLAGS_RESERVE_SIGNALS_ONLY
+ *    the output of this API will be the SOB offset from CFG_BASE.
+ *    this address will be used to patch CB cmds to do the signaling for this
+ *    SOB by incrementing it's value.
+ *    for reverting the reservation use HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY
+ *    CS type, note that this might fail if out-of-sync happened to the SOB
+ *    value, in case other signaling request to the same SOB occurred between
+ *    reserve-unreserve calls.
+ * 2. Use the staged CS to do the encapsulated signaling jobs.
+ *    use HL_CS_FLAGS_STAGED_SUBMISSION and HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+ *    along with HL_CS_FLAGS_ENCAP_SIGNALS flag, and set encaps_signal_offset
+ *    field. This offset allows app to wait on part of the reserved signals.
+ * 3. Use WAIT/COLLECTIVE WAIT CS along with HL_CS_FLAGS_ENCAP_SIGNALS flag
+ *    to wait for the encapsulated signals.
+ */
+#define HL_CS_FLAGS_ENCAP_SIGNALS              0x800
+#define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY       0x1000
+#define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY     0x2000
+
 #define HL_CS_STATUS_SUCCESS           0
 
 #define HL_MAX_JOBS_PER_CS             512
@@ -690,10 +774,35 @@ struct hl_cs_in {
        /* holds address of array of hl_cs_chunk for execution phase */
        __u64 chunks_execute;
 
-       /* Sequence number of a staged submission CS
-        * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
-        */
-       __u64 seq;
+       union {
+               /*
+                * Sequence number of a staged submission CS
+                * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set and
+                * HL_CS_FLAGS_STAGED_SUBMISSION_FIRST is unset.
+                */
+               __u64 seq;
+
+               /*
+                * Encapsulated signals handle id
+                * Valid for two flows:
+                * 1. CS with encapsulated signals:
+                *    when HL_CS_FLAGS_STAGED_SUBMISSION and
+                *    HL_CS_FLAGS_STAGED_SUBMISSION_FIRST
+                *    and HL_CS_FLAGS_ENCAP_SIGNALS are set.
+                * 2. unreserve signals:
+                *    valid when HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY is set.
+                */
+               __u32 encaps_sig_handle_id;
+
+               /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+               struct {
+                       /* Encapsulated signals number */
+                       __u32 encaps_signals_count;
+
+                       /* Encapsulated signals queue index (stream) */
+                       __u32 encaps_signals_q_idx;
+               };
+       };
 
        /* Number of chunks in restore phase array. Maximum number is
         * HL_MAX_JOBS_PER_CS
@@ -718,14 +827,31 @@ struct hl_cs_in {
 };
 
 struct hl_cs_out {
+       union {
+               /*
+                * seq holds the sequence number of the CS to pass to wait
+                * ioctl. All values are valid except for 0 and ULLONG_MAX
+                */
+               __u64 seq;
+
+               /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */
+               struct {
+                       /* This is the resereved signal handle id */
+                       __u32 handle_id;
+
+                       /* This is the signals count */
+                       __u32 count;
+               };
+       };
+
+       /* HL_CS_STATUS */
+       __u32 status;
+
        /*
-        * seq holds the sequence number of the CS to pass to wait ioctl. All
-        * values are valid except for 0 and ULLONG_MAX
+        * SOB base address offset
+        * Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set
         */
-       __u64 seq;
-       /* HL_CS_STATUS_* */
-       __u32 status;
-       __u32 pad;
+       __u32 sob_base_addr_offset;
 };
 
 union hl_cs_args {
@@ -735,11 +861,18 @@ union hl_cs_args {
 
 #define HL_WAIT_CS_FLAGS_INTERRUPT     0x2
 #define HL_WAIT_CS_FLAGS_INTERRUPT_MASK 0xFFF00000
+#define HL_WAIT_CS_FLAGS_MULTI_CS      0x4
+
+#define HL_WAIT_MULTI_CS_LIST_MAX_LEN  32
 
 struct hl_wait_cs_in {
        union {
                struct {
-                       /* Command submission sequence number */
+                       /*
+                        * In case of wait_cs holds the CS sequence number.
+                        * In case of wait for multi CS hold a user pointer to
+                        * an array of CS sequence numbers
+                        */
                        __u64 seq;
                        /* Absolute timeout to wait for command submission
                         * in microseconds
@@ -767,12 +900,17 @@ struct hl_wait_cs_in {
 
        /* Context ID - Currently not in use */
        __u32 ctx_id;
+
        /* HL_WAIT_CS_FLAGS_*
         * If HL_WAIT_CS_FLAGS_INTERRUPT is set, this field should include
         * interrupt id according to HL_WAIT_CS_FLAGS_INTERRUPT_MASK, in order
         * not to specify an interrupt id ,set mask to all 1s.
         */
        __u32 flags;
+
+       /* Multi CS API info- valid entries in multi-CS array */
+       __u8 seq_arr_len;
+       __u8 pad[7];
 };
 
 #define HL_WAIT_CS_STATUS_COMPLETED    0
@@ -789,8 +927,15 @@ struct hl_wait_cs_out {
        __u32 status;
        /* HL_WAIT_CS_STATUS_FLAG* */
        __u32 flags;
-       /* valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set */
+       /*
+        * valid only if HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD is set
+        * for wait_cs: timestamp of CS completion
+        * for wait_multi_cs: timestamp of FIRST CS completion
+        */
        __s64 timestamp_nsec;
+       /* multi CS completion bitmap */
+       __u32 cs_completion_map;
+       __u32 pad;
 };
 
 union hl_wait_cs_args {
@@ -813,6 +958,7 @@ union hl_wait_cs_args {
 #define HL_MEM_CONTIGUOUS      0x1
 #define HL_MEM_SHARED          0x2
 #define HL_MEM_USERPTR         0x4
+#define HL_MEM_FORCE_HINT      0x8
 
 struct hl_mem_in {
        union {
index b691d68..2ed30ff 100644 (file)
@@ -338,31 +338,22 @@ __setup("rootflags=", root_data_setup);
 __setup("rootfstype=", fs_names_setup);
 __setup("rootdelay=", root_delay_setup);
 
-static void __init get_fs_names(char *page)
+static int __init split_fs_names(char *page, char *names)
 {
-       char *s = page;
+       int count = 0;
+       char *p = page;
 
-       if (root_fs_names) {
-               strcpy(page, root_fs_names);
-               while (*s++) {
-                       if (s[-1] == ',')
-                               s[-1] = '\0';
-               }
-       } else {
-               int len = get_filesystem_list(page);
-               char *p, *next;
-
-               page[len] = '\0';
-               for (p = page-1; p; p = next) {
-                       next = strchr(++p, '\n');
-                       if (*p++ != '\t')
-                               continue;
-                       while ((*s++ = *p++) != '\n')
-                               ;
-                       s[-1] = '\0';
-               }
+       strcpy(p, root_fs_names);
+       while (*p++) {
+               if (p[-1] == ',')
+                       p[-1] = '\0';
        }
-       *s = '\0';
+       *p = '\0';
+
+       for (p = page; *p; p += strlen(p)+1)
+               count++;
+
+       return count;
 }
 
 static int __init do_mount_root(const char *name, const char *fs,
@@ -408,12 +399,16 @@ void __init mount_block_root(char *name, int flags)
        char *fs_names = page_address(page);
        char *p;
        char b[BDEVNAME_SIZE];
+       int num_fs, i;
 
        scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
                  MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
-       get_fs_names(fs_names);
+       if (root_fs_names)
+               num_fs = split_fs_names(fs_names, root_fs_names);
+       else
+               num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
 retry:
-       for (p = fs_names; *p; p += strlen(p)+1) {
+       for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1) {
                int err = do_mount_root(name, p, flags, root_mount_data);
                switch (err) {
                        case 0:
@@ -442,7 +437,7 @@ retry:
        printk("List of all partitions:\n");
        printk_all_partitions();
        printk("No filesystem could mount root, tried: ");
-       for (p = fs_names; *p; p += strlen(p)+1)
+       for (i = 0, p = fs_names; i < num_fs; i++, p += strlen(p)+1)
                printk(" %s", p);
        printk("\n");
        panic("VFS: Unable to mount root fs on %s", b);
@@ -526,6 +521,47 @@ static int __init mount_cifs_root(void)
 }
 #endif
 
+static bool __init fs_is_nodev(char *fstype)
+{
+       struct file_system_type *fs = get_fs_type(fstype);
+       bool ret = false;
+
+       if (fs) {
+               ret = !(fs->fs_flags & FS_REQUIRES_DEV);
+               put_filesystem(fs);
+       }
+
+       return ret;
+}
+
+static int __init mount_nodev_root(void)
+{
+       char *fs_names, *fstype;
+       int err = -EINVAL;
+       int num_fs, i;
+
+       fs_names = (void *)__get_free_page(GFP_KERNEL);
+       if (!fs_names)
+               return -EINVAL;
+       num_fs = split_fs_names(fs_names, root_fs_names);
+
+       for (i = 0, fstype = fs_names; i < num_fs;
+            i++, fstype += strlen(fstype) + 1) {
+               if (!fs_is_nodev(fstype))
+                       continue;
+               err = do_mount_root(root_device_name, fstype, root_mountflags,
+                                   root_mount_data);
+               if (!err)
+                       break;
+               if (err != -EACCES && err != -EINVAL)
+                       panic("VFS: Unable to mount root \"%s\" (%s), err=%d\n",
+                             root_device_name, fstype, err);
+       }
+
+       free_page((unsigned long)fs_names);
+       return err;
+}
+
 void __init mount_root(void)
 {
 #ifdef CONFIG_ROOT_NFS
@@ -542,6 +578,10 @@ void __init mount_root(void)
                return;
        }
 #endif
+       if (ROOT_DEV == 0 && root_device_name && root_fs_names) {
+               if (mount_nodev_root() == 0)
+                       return;
+       }
 #ifdef CONFIG_BLOCK
        {
                int err = create_dev("/dev/root", ROOT_DEV);
index 733e147..5c9a48d 100644 (file)
@@ -153,10 +153,10 @@ static char *extra_init_args;
 #ifdef CONFIG_BOOT_CONFIG
 /* Is bootconfig on command line? */
 static bool bootconfig_found;
-static bool initargs_found;
+static size_t initargs_offs;
 #else
 # define bootconfig_found false
-# define initargs_found false
+# define initargs_offs 0
 #endif
 
 static char *execute_command;
@@ -422,9 +422,9 @@ static void __init setup_boot_config(void)
        if (IS_ERR(err) || !bootconfig_found)
                return;
 
-       /* parse_args() stops at '--' and returns an address */
+       /* parse_args() stops at the next param of '--' and returns an address */
        if (err)
-               initargs_found = true;
+               initargs_offs = err - tmp_cmdline;
 
        if (!data) {
                pr_err("'bootconfig' found on command line, but no bootconfig found\n");
@@ -468,7 +468,12 @@ static void __init setup_boot_config(void)
        return;
 }
 
-#else
+static void __init exit_boot_config(void)
+{
+       xbc_destroy_all();
+}
+
+#else  /* !CONFIG_BOOT_CONFIG */
 
 static void __init setup_boot_config(void)
 {
@@ -481,7 +486,11 @@ static int __init warn_bootconfig(char *str)
        pr_warn("WARNING: 'bootconfig' found on the kernel command line but CONFIG_BOOT_CONFIG is not set.\n");
        return 0;
 }
-#endif
+
+#define exit_boot_config()     do {} while (0)
+
+#endif /* CONFIG_BOOT_CONFIG */
+
 early_param("bootconfig", warn_bootconfig);
 
 /* Change NUL term back to "=", to make "param" the whole string. */
@@ -646,16 +655,21 @@ static void __init setup_command_line(char *command_line)
                 * Append supplemental init boot args to saved_command_line
                 * so that user can check what command line options passed
                 * to init.
+                * The order should always be
+                * " -- "[bootconfig init-param][cmdline init-param]
                 */
-               len = strlen(saved_command_line);
-               if (initargs_found) {
-                       saved_command_line[len++] = ' ';
+               if (initargs_offs) {
+                       len = xlen + initargs_offs;
+                       strcpy(saved_command_line + len, extra_init_args);
+                       len += ilen - 4;        /* strlen(extra_init_args) */
+                       strcpy(saved_command_line + len,
+                               boot_command_line + initargs_offs - 1);
                } else {
+                       len = strlen(saved_command_line);
                        strcpy(saved_command_line + len, " -- ");
                        len += 4;
+                       strcpy(saved_command_line + len, extra_init_args);
                }
-
-               strcpy(saved_command_line + len, extra_init_args);
        }
 }
 
@@ -1494,6 +1508,7 @@ static int __ref kernel_init(void *unused)
        kprobe_free_init_mem();
        ftrace_free_init_mem();
        kgdb_free_init_mem();
+       exit_boot_config();
        free_initmem();
        mark_readonly();
 
index 1a8b9f0..f833238 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -1984,47 +1984,34 @@ out:
        return un;
 }
 
-static long do_semtimedop(int semid, struct sembuf __user *tsops,
-               unsigned nsops, const struct timespec64 *timeout)
+long __do_semtimedop(int semid, struct sembuf *sops,
+               unsigned nsops, const struct timespec64 *timeout,
+               struct ipc_namespace *ns)
 {
        int error = -EINVAL;
        struct sem_array *sma;
-       struct sembuf fast_sops[SEMOPM_FAST];
-       struct sembuf *sops = fast_sops, *sop;
+       struct sembuf *sop;
        struct sem_undo *un;
        int max, locknum;
        bool undos = false, alter = false, dupsop = false;
        struct sem_queue queue;
        unsigned long dup = 0, jiffies_left = 0;
-       struct ipc_namespace *ns;
-
-       ns = current->nsproxy->ipc_ns;
 
        if (nsops < 1 || semid < 0)
                return -EINVAL;
        if (nsops > ns->sc_semopm)
                return -E2BIG;
-       if (nsops > SEMOPM_FAST) {
-               sops = kvmalloc_array(nsops, sizeof(*sops),
-                                     GFP_KERNEL_ACCOUNT);
-               if (sops == NULL)
-                       return -ENOMEM;
-       }
-
-       if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
-               error =  -EFAULT;
-               goto out_free;
-       }
 
        if (timeout) {
                if (timeout->tv_sec < 0 || timeout->tv_nsec < 0 ||
                        timeout->tv_nsec >= 1000000000L) {
                        error = -EINVAL;
-                       goto out_free;
+                       goto out;
                }
                jiffies_left = timespec64_to_jiffies(timeout);
        }
 
+
        max = 0;
        for (sop = sops; sop < sops + nsops; sop++) {
                unsigned long mask = 1ULL << ((sop->sem_num) % BITS_PER_LONG);
@@ -2053,7 +2040,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
                un = find_alloc_undo(ns, semid);
                if (IS_ERR(un)) {
                        error = PTR_ERR(un);
-                       goto out_free;
+                       goto out;
                }
        } else {
                un = NULL;
@@ -2064,25 +2051,25 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
        if (IS_ERR(sma)) {
                rcu_read_unlock();
                error = PTR_ERR(sma);
-               goto out_free;
+               goto out;
        }
 
        error = -EFBIG;
        if (max >= sma->sem_nsems) {
                rcu_read_unlock();
-               goto out_free;
+               goto out;
        }
 
        error = -EACCES;
        if (ipcperms(ns, &sma->sem_perm, alter ? S_IWUGO : S_IRUGO)) {
                rcu_read_unlock();
-               goto out_free;
+               goto out;
        }
 
        error = security_sem_semop(&sma->sem_perm, sops, nsops, alter);
        if (error) {
                rcu_read_unlock();
-               goto out_free;
+               goto out;
        }
 
        error = -EIDRM;
@@ -2096,7 +2083,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
         * entangled here and why it's RMID race safe on comments at sem_lock()
         */
        if (!ipc_valid_object(&sma->sem_perm))
-               goto out_unlock_free;
+               goto out_unlock;
        /*
         * semid identifiers are not unique - find_alloc_undo may have
         * allocated an undo structure, it was invalidated by an RMID
@@ -2105,7 +2092,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
         * "un" itself is guaranteed by rcu.
         */
        if (un && un->semid == -1)
-               goto out_unlock_free;
+               goto out_unlock;
 
        queue.sops = sops;
        queue.nsops = nsops;
@@ -2131,10 +2118,10 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
                rcu_read_unlock();
                wake_up_q(&wake_q);
 
-               goto out_free;
+               goto out;
        }
        if (error < 0) /* non-blocking error path */
-               goto out_unlock_free;
+               goto out_unlock;
 
        /*
         * We need to sleep on this operation, so we put the current
@@ -2199,14 +2186,14 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
                if (error != -EINTR) {
                        /* see SEM_BARRIER_2 for purpose/pairing */
                        smp_acquire__after_ctrl_dep();
-                       goto out_free;
+                       goto out;
                }
 
                rcu_read_lock();
                locknum = sem_lock(sma, sops, nsops);
 
                if (!ipc_valid_object(&sma->sem_perm))
-                       goto out_unlock_free;
+                       goto out_unlock;
 
                /*
                 * No necessity for any barrier: We are protect by sem_lock()
@@ -2218,7 +2205,7 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
                 * Leave without unlink_queue(), but with sem_unlock().
                 */
                if (error != -EINTR)
-                       goto out_unlock_free;
+                       goto out_unlock;
 
                /*
                 * If an interrupt occurred we have to clean up the queue.
@@ -2229,13 +2216,45 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops,
 
        unlink_queue(sma, &queue);
 
-out_unlock_free:
+out_unlock:
        sem_unlock(sma, locknum);
        rcu_read_unlock();
+out:
+       return error;
+}
+
+static long do_semtimedop(int semid, struct sembuf __user *tsops,
+               unsigned nsops, const struct timespec64 *timeout)
+{
+       struct sembuf fast_sops[SEMOPM_FAST];
+       struct sembuf *sops = fast_sops;
+       struct ipc_namespace *ns;
+       int ret;
+
+       ns = current->nsproxy->ipc_ns;
+       if (nsops > ns->sc_semopm)
+               return -E2BIG;
+       if (nsops < 1)
+               return -EINVAL;
+
+       if (nsops > SEMOPM_FAST) {
+               sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL_ACCOUNT);
+               if (sops == NULL)
+                       return -ENOMEM;
+       }
+
+       if (copy_from_user(sops, tsops, nsops * sizeof(*tsops))) {
+               ret =  -EFAULT;
+               goto out_free;
+       }
+
+       ret = __do_semtimedop(semid, sops, nsops, timeout, ns);
+
 out_free:
        if (sops != fast_sops)
                kvfree(sops);
-       return error;
+
+       return ret;
 }
 
 long ksys_semtimedop(int semid, struct sembuf __user *tsops,
index 2dbf797..7896d30 100644 (file)
@@ -2603,6 +2603,15 @@ enum print_line_t trace_handle_return(struct trace_seq *s)
 }
 EXPORT_SYMBOL_GPL(trace_handle_return);
 
+static unsigned short migration_disable_value(void)
+{
+#if defined(CONFIG_SMP)
+       return current->migration_disabled;
+#else
+       return 0;
+#endif
+}
+
 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
 {
        unsigned int trace_flags = irqs_status;
@@ -2621,7 +2630,8 @@ unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
                trace_flags |= TRACE_FLAG_NEED_RESCHED;
        if (test_preempt_need_resched())
                trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
-       return (trace_flags << 16) | (pc & 0xff);
+       return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
+               (min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
 }
 
 struct ring_buffer_event *
@@ -4189,9 +4199,10 @@ static void print_lat_help_header(struct seq_file *m)
                    "#                  | / _----=> need-resched    \n"
                    "#                  || / _---=> hardirq/softirq \n"
                    "#                  ||| / _--=> preempt-depth   \n"
-                   "#                  |||| /     delay            \n"
-                   "#  cmd     pid     ||||| time  |   caller      \n"
-                   "#     \\   /        |||||  \\    |   /         \n");
+                   "#                  |||| / _-=> migrate-disable \n"
+                   "#                  ||||| /     delay           \n"
+                   "#  cmd     pid     |||||| time  |   caller     \n"
+                   "#     \\   /        ||||||  \\    |    /       \n");
 }
 
 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
@@ -4229,9 +4240,10 @@ static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file
        seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
        seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
        seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
-       seq_printf(m, "#                            %.*s||| /     delay\n", prec, space);
-       seq_printf(m, "#           TASK-PID  %.*s CPU#  ||||   TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
-       seq_printf(m, "#              | |    %.*s   |   ||||      |         |\n", prec, "       |    ");
+       seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
+       seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
+       seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
+       seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
 }
 
 void
index 1060b04..388e65d 100644 (file)
@@ -522,14 +522,14 @@ trace_boot_init_events(struct trace_array *tr, struct xbc_node *node)
        if (!node)
                return;
        /* per-event key starts with "event.GROUP.EVENT" */
-       xbc_node_for_each_child(node, gnode) {
+       xbc_node_for_each_subkey(node, gnode) {
                data = xbc_node_get_data(gnode);
                if (!strcmp(data, "enable")) {
                        enable_all = true;
                        continue;
                }
                enable = false;
-               xbc_node_for_each_child(gnode, enode) {
+               xbc_node_for_each_subkey(gnode, enode) {
                        data = xbc_node_get_data(enode);
                        if (!strcmp(data, "enable")) {
                                enable = true;
@@ -625,7 +625,7 @@ trace_boot_init_instances(struct xbc_node *node)
        if (!node)
                return;
 
-       xbc_node_for_each_child(node, inode) {
+       xbc_node_for_each_subkey(node, inode) {
                p = xbc_node_get_data(inode);
                if (!p || *p == '\0')
                        continue;
index 56a96e9..3044b76 100644 (file)
@@ -151,7 +151,7 @@ static struct trace_eprobe *alloc_event_probe(const char *group,
 
        ep = kzalloc(struct_size(ep, tp.args, nargs), GFP_KERNEL);
        if (!ep) {
-               trace_event_put_ref(ep->event);
+               trace_event_put_ref(event);
                goto error;
        }
        ep->event = event;
@@ -851,7 +851,8 @@ static int __trace_eprobe_create(int argc, const char *argv[])
                ret = PTR_ERR(ep);
                /* This must return -ENOMEM, else there is a bug */
                WARN_ON_ONCE(ret != -ENOMEM);
-               goto error;     /* We know ep is not allocated */
+               ep = NULL;
+               goto error;
        }
 
        argc -= 2; argv += 2;
index 1349b6d..830b3b9 100644 (file)
@@ -181,6 +181,7 @@ static int trace_define_common_fields(void)
 
        __common_field(unsigned short, type);
        __common_field(unsigned char, flags);
+       /* Holds both preempt_count and migrate_disable */
        __common_field(unsigned char, preempt_count);
        __common_field(int, pid);
 
index 9d91b1c..a6061a6 100644 (file)
@@ -508,7 +508,8 @@ struct track_data {
 struct hist_elt_data {
        char *comm;
        u64 *var_ref_vals;
-       char *field_var_str[SYNTH_FIELDS_MAX];
+       char **field_var_str;
+       int n_field_var_str;
 };
 
 struct snapshot_context {
@@ -1401,9 +1402,11 @@ static void hist_elt_data_free(struct hist_elt_data *elt_data)
 {
        unsigned int i;
 
-       for (i = 0; i < SYNTH_FIELDS_MAX; i++)
+       for (i = 0; i < elt_data->n_field_var_str; i++)
                kfree(elt_data->field_var_str[i]);
 
+       kfree(elt_data->field_var_str);
+
        kfree(elt_data->comm);
        kfree(elt_data);
 }
@@ -1451,6 +1454,13 @@ static int hist_trigger_elt_data_alloc(struct tracing_map_elt *elt)
 
        size = STR_VAR_LEN_MAX;
 
+       elt_data->field_var_str = kcalloc(n_str, sizeof(char *), GFP_KERNEL);
+       if (!elt_data->field_var_str) {
+               hist_elt_data_free(elt_data);
+               return -EINVAL;
+       }
+       elt_data->n_field_var_str = n_str;
+
        for (i = 0; i < n_str; i++) {
                elt_data->field_var_str[i] = kzalloc(size, GFP_KERNEL);
                if (!elt_data->field_var_str[i]) {
index 65b08b8..ce05361 100644 (file)
@@ -1548,7 +1548,7 @@ static int start_kthread(unsigned int cpu)
 static int start_per_cpu_kthreads(struct trace_array *tr)
 {
        struct cpumask *current_mask = &save_cpumask;
-       int retval;
+       int retval = 0;
        int cpu;
 
        cpus_read_lock();
@@ -1568,13 +1568,13 @@ static int start_per_cpu_kthreads(struct trace_array *tr)
                retval = start_kthread(cpu);
                if (retval) {
                        stop_per_cpu_kthreads();
-                       return retval;
+                       break;
                }
        }
 
        cpus_read_unlock();
 
-       return 0;
+       return retval;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
index a0bf446..c2ca40e 100644 (file)
@@ -492,8 +492,13 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
        trace_seq_printf(s, "%c%c%c",
                         irqs_off, need_resched, hardsoft_irq);
 
-       if (entry->preempt_count)
-               trace_seq_printf(s, "%x", entry->preempt_count);
+       if (entry->preempt_count & 0xf)
+               trace_seq_printf(s, "%x", entry->preempt_count & 0xf);
+       else
+               trace_seq_putc(s, '.');
+
+       if (entry->preempt_count & 0xf0)
+               trace_seq_printf(s, "%x", entry->preempt_count >> 4);
        else
                trace_seq_putc(s, '.');
 
@@ -656,7 +661,7 @@ int trace_print_lat_context(struct trace_iterator *iter)
                trace_seq_printf(
                        s, "%16s %7d %3d %d %08x %08lx ",
                        comm, entry->pid, iter->cpu, entry->flags,
-                       entry->preempt_count, iter->idx);
+                       entry->preempt_count & 0xf, iter->idx);
        } else {
                lat_print_generic(s, entry, iter->cpu);
        }
index 4007fe9..b29595f 100644 (file)
@@ -5,7 +5,7 @@
 #include "trace_dynevent.h"
 
 #define SYNTH_SYSTEM           "synthetic"
-#define SYNTH_FIELDS_MAX       32
+#define SYNTH_FIELDS_MAX       64
 
 #define STR_VAR_LEN_MAX                MAX_FILTER_STR_VAL /* must be multiple of sizeof(u64) */
 
index b76b92d..9bdfde0 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 #include <linux/slab.h>
 #include <linux/logic_iomem.h>
+#include <asm/io.h>
 
 struct logic_iomem_region {
        const struct resource *res;
@@ -78,7 +79,7 @@ static void __iomem *real_ioremap(phys_addr_t offset, size_t size)
 static void real_iounmap(void __iomem *addr)
 {
        WARN(1, "invalid iounmap for addr 0x%llx\n",
-            (unsigned long long)addr);
+            (unsigned long long __force)addr);
 }
 #endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
 
@@ -172,14 +173,15 @@ EXPORT_SYMBOL(iounmap);
 static u##sz real_raw_read ## op(const volatile void __iomem *addr)    \
 {                                                                      \
        WARN(1, "Invalid read" #op " at address %llx\n",                \
-            (unsigned long long)addr);                                 \
+            (unsigned long long __force)addr);                         \
        return (u ## sz)~0ULL;                                          \
 }                                                                      \
                                                                        \
-void real_raw_write ## op(u ## sz val, volatile void __iomem *addr)    \
+static void real_raw_write ## op(u ## sz val,                          \
+                                volatile void __iomem *addr)           \
 {                                                                      \
        WARN(1, "Invalid writeq" #op " of 0x%llx at address %llx\n",    \
-            (unsigned long long)val, (unsigned long long)addr);        \
+            (unsigned long long)val, (unsigned long long __force)addr);\
 }                                                                      \
 
 MAKE_FALLBACK(b, 8);
@@ -192,14 +194,14 @@ MAKE_FALLBACK(q, 64);
 static void real_memset_io(volatile void __iomem *addr, int value, size_t size)
 {
        WARN(1, "Invalid memset_io at address 0x%llx\n",
-            (unsigned long long)addr);
+            (unsigned long long __force)addr);
 }
 
 static void real_memcpy_fromio(void *buffer, const volatile void __iomem *addr,
                               size_t size)
 {
        WARN(1, "Invalid memcpy_fromio at address 0x%llx\n",
-            (unsigned long long)addr);
+            (unsigned long long __force)addr);
 
        memset(buffer, 0xff, size);
 }
@@ -208,7 +210,7 @@ static void real_memcpy_toio(volatile void __iomem *addr, const void *buffer,
                             size_t size)
 {
        WARN(1, "Invalid memcpy_toio at address 0x%llx\n",
-            (unsigned long long)addr);
+            (unsigned long long __force)addr);
 }
 #endif /* CONFIG_LOGIC_IOMEM_FALLBACK */
 
index 3bd7040..d3f1a1f 100644 (file)
@@ -24,13 +24,21 @@ bool __weak copy_from_kernel_nofault_allowed(const void *unsafe_src,
 
 long copy_from_kernel_nofault(void *dst, const void *src, size_t size)
 {
+       unsigned long align = 0;
+
+       if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+               align = (unsigned long)dst | (unsigned long)src;
+
        if (!copy_from_kernel_nofault_allowed(src, size))
                return -ERANGE;
 
        pagefault_disable();
-       copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
-       copy_from_kernel_nofault_loop(dst, src, size, u32, Efault);
-       copy_from_kernel_nofault_loop(dst, src, size, u16, Efault);
+       if (!(align & 7))
+               copy_from_kernel_nofault_loop(dst, src, size, u64, Efault);
+       if (!(align & 3))
+               copy_from_kernel_nofault_loop(dst, src, size, u32, Efault);
+       if (!(align & 1))
+               copy_from_kernel_nofault_loop(dst, src, size, u16, Efault);
        copy_from_kernel_nofault_loop(dst, src, size, u8, Efault);
        pagefault_enable();
        return 0;
@@ -50,10 +58,18 @@ EXPORT_SYMBOL_GPL(copy_from_kernel_nofault);
 
 long copy_to_kernel_nofault(void *dst, const void *src, size_t size)
 {
+       unsigned long align = 0;
+
+       if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS))
+               align = (unsigned long)dst | (unsigned long)src;
+
        pagefault_disable();
-       copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);
-       copy_to_kernel_nofault_loop(dst, src, size, u32, Efault);
-       copy_to_kernel_nofault_loop(dst, src, size, u16, Efault);
+       if (!(align & 7))
+               copy_to_kernel_nofault_loop(dst, src, size, u64, Efault);
+       if (!(align & 3))
+               copy_to_kernel_nofault_loop(dst, src, size, u32, Efault);
+       if (!(align & 1))
+               copy_to_kernel_nofault_loop(dst, src, size, u16, Efault);
        copy_to_kernel_nofault_loop(dst, src, size, u8, Efault);
        pagefault_enable();
        return 0;
index ae1e254..3b46490 100644 (file)
@@ -87,10 +87,24 @@ static void snd_gus_init_control(struct snd_gus_card *gus)
 
 static int snd_gus_free(struct snd_gus_card *gus)
 {
-       if (gus->gf1.res_port2) {
-               snd_gf1_stop(gus);
-               snd_gus_init_dma_irq(gus, 0);
+       if (gus->gf1.res_port2 == NULL)
+               goto __hw_end;
+       snd_gf1_stop(gus);
+       snd_gus_init_dma_irq(gus, 0);
+      __hw_end:
+       release_and_free_resource(gus->gf1.res_port1);
+       release_and_free_resource(gus->gf1.res_port2);
+       if (gus->gf1.irq >= 0)
+               free_irq(gus->gf1.irq, (void *) gus);
+       if (gus->gf1.dma1 >= 0) {
+               disable_dma(gus->gf1.dma1);
+               free_dma(gus->gf1.dma1);
        }
+       if (!gus->equal_dma && gus->gf1.dma2 >= 0) {
+               disable_dma(gus->gf1.dma2);
+               free_dma(gus->gf1.dma2);
+       }
+       kfree(gus);
        return 0;
 }
 
@@ -116,7 +130,7 @@ int snd_gus_create(struct snd_card *card,
        };
 
        *rgus = NULL;
-       gus = devm_kzalloc(card->dev, sizeof(*gus), GFP_KERNEL);
+       gus = kzalloc(sizeof(*gus), GFP_KERNEL);
        if (gus == NULL)
                return -ENOMEM;
        spin_lock_init(&gus->reg_lock);
@@ -142,33 +156,35 @@ int snd_gus_create(struct snd_card *card,
        gus->gf1.reg_timerctrl = GUSP(gus, TIMERCNTRL);
        gus->gf1.reg_timerdata = GUSP(gus, TIMERDATA);
        /* allocate resources */
-       gus->gf1.res_port1 = devm_request_region(card->dev, port, 16,
-                                                "GUS GF1 (Adlib/SB)");
+       gus->gf1.res_port1 = request_region(port, 16, "GUS GF1 (Adlib/SB)");
        if (!gus->gf1.res_port1) {
                snd_printk(KERN_ERR "gus: can't grab SB port 0x%lx\n", port);
+               snd_gus_free(gus);
                return -EBUSY;
        }
-       gus->gf1.res_port2 = devm_request_region(card->dev, port + 0x100, 12,
-                                                "GUS GF1 (Synth)");
+       gus->gf1.res_port2 = request_region(port + 0x100, 12, "GUS GF1 (Synth)");
        if (!gus->gf1.res_port2) {
                snd_printk(KERN_ERR "gus: can't grab synth port 0x%lx\n", port + 0x100);
+               snd_gus_free(gus);
                return -EBUSY;
        }
-       if (irq >= 0 && devm_request_irq(card->dev, irq, snd_gus_interrupt, 0,
-                                        "GUS GF1", (void *) gus)) {
+       if (irq >= 0 && request_irq(irq, snd_gus_interrupt, 0, "GUS GF1", (void *) gus)) {
                snd_printk(KERN_ERR "gus: can't grab irq %d\n", irq);
+               snd_gus_free(gus);
                return -EBUSY;
        }
        gus->gf1.irq = irq;
        card->sync_irq = irq;
-       if (snd_devm_request_dma(card->dev, dma1, "GUS - 1")) {
+       if (request_dma(dma1, "GUS - 1")) {
                snd_printk(KERN_ERR "gus: can't grab DMA1 %d\n", dma1);
+               snd_gus_free(gus);
                return -EBUSY;
        }
        gus->gf1.dma1 = dma1;
        if (dma2 >= 0 && dma1 != dma2) {
-               if (snd_devm_request_dma(card->dev, dma2, "GUS - 2")) {
+               if (request_dma(dma2, "GUS - 2")) {
                        snd_printk(KERN_ERR "gus: can't grab DMA2 %d\n", dma2);
+                       snd_gus_free(gus);
                        return -EBUSY;
                }
                gus->gf1.dma2 = dma2;
@@ -193,8 +209,10 @@ int snd_gus_create(struct snd_card *card,
        gus->gf1.volume_ramp = 25;
        gus->gf1.smooth_pan = 1;
        err = snd_device_new(card, SNDRV_DEV_LOWLEVEL, gus, &ops);
-       if (err < 0)
+       if (err < 0) {
+               snd_gus_free(gus);
                return err;
+       }
        *rgus = gus;
        return 0;
 }
index 20f490e..a04a9d3 100644 (file)
@@ -618,12 +618,19 @@ static int snd_interwave_card_new(struct device *pdev, int dev,
        return 0;
 }
 
-static int snd_interwave_probe(struct snd_card *card, int dev)
+static int snd_interwave_probe_gus(struct snd_card *card, int dev,
+                                  struct snd_gus_card **gusp)
+{
+       return snd_gus_create(card, port[dev], -irq[dev], dma1[dev], dma2[dev],
+                             0, 32, pcm_channels[dev], effect[dev], gusp);
+}
+
+static int snd_interwave_probe(struct snd_card *card, int dev,
+                              struct snd_gus_card *gus)
 {
        int xirq, xdma1, xdma2;
        struct snd_interwave *iwcard = card->private_data;
        struct snd_wss *wss;
-       struct snd_gus_card *gus;
 #ifdef SNDRV_STB
        struct snd_i2c_bus *i2c_bus;
 #endif
@@ -634,14 +641,6 @@ static int snd_interwave_probe(struct snd_card *card, int dev)
        xdma1 = dma1[dev];
        xdma2 = dma2[dev];
 
-       err = snd_gus_create(card,
-                            port[dev],
-                            -xirq, xdma1, xdma2,
-                            0, 32,
-                            pcm_channels[dev], effect[dev], &gus);
-       if (err < 0)
-               return err;
-
        err = snd_interwave_detect(iwcard, gus, dev
 #ifdef SNDRV_STB
                                   , &i2c_bus
@@ -757,22 +756,6 @@ static int snd_interwave_probe(struct snd_card *card, int dev)
        return 0;
 }
 
-static int snd_interwave_isa_probe1(int dev, struct device *devptr)
-{
-       struct snd_card *card;
-       int err;
-
-       err = snd_interwave_card_new(devptr, dev, &card);
-       if (err < 0)
-               return err;
-
-       err = snd_interwave_probe(card, dev);
-       if (err < 0)
-               return err;
-       dev_set_drvdata(devptr, card);
-       return 0;
-}
-
 static int snd_interwave_isa_match(struct device *pdev,
                                   unsigned int dev)
 {
@@ -788,6 +771,8 @@ static int snd_interwave_isa_match(struct device *pdev,
 static int snd_interwave_isa_probe(struct device *pdev,
                                   unsigned int dev)
 {
+       struct snd_card *card;
+       struct snd_gus_card *gus;
        int err;
        static const int possible_irqs[] = {5, 11, 12, 9, 7, 15, 3, -1};
        static const int possible_dmas[] = {0, 1, 3, 5, 6, 7, -1};
@@ -814,19 +799,31 @@ static int snd_interwave_isa_probe(struct device *pdev,
                }
        }
 
+       err = snd_interwave_card_new(pdev, dev, &card);
+       if (err < 0)
+               return err;
+
        if (port[dev] != SNDRV_AUTO_PORT)
-               return snd_interwave_isa_probe1(dev, pdev);
+               err = snd_interwave_probe_gus(card, dev, &gus);
        else {
                static const long possible_ports[] = {0x210, 0x220, 0x230, 0x240, 0x250, 0x260};
                int i;
                for (i = 0; i < ARRAY_SIZE(possible_ports); i++) {
                        port[dev] = possible_ports[i];
-                       err = snd_interwave_isa_probe1(dev, pdev);
+                       err = snd_interwave_probe_gus(card, dev, &gus);
                        if (! err)
                                return 0;
                }
-               return err;
        }
+       if (err < 0)
+               return err;
+
+       err = snd_interwave_probe(card, dev, gus);
+       if (err < 0)
+               return err;
+
+       dev_set_drvdata(pdev, card);
+       return 0;
 }
 
 static struct isa_driver snd_interwave_driver = {
@@ -844,6 +841,7 @@ static int snd_interwave_pnp_detect(struct pnp_card_link *pcard,
 {
        static int dev;
        struct snd_card *card;
+       struct snd_gus_card *gus;
        int res;
 
        for ( ; dev < SNDRV_CARDS; dev++) {
@@ -860,7 +858,10 @@ static int snd_interwave_pnp_detect(struct pnp_card_link *pcard,
        res = snd_interwave_pnp(dev, card->private_data, pcard, pid);
        if (res < 0)
                return res;
-       res = snd_interwave_probe(card, dev);
+       res = snd_interwave_probe_gus(card, dev, &gus);
+       if (res < 0)
+               return res;
+       res = snd_interwave_probe(card, dev, gus);
        if (res < 0)
                return res;
        pnp_set_card_drvdata(pcard, card);
index f48cc20..468a6a2 100644 (file)
@@ -137,6 +137,7 @@ static int snd_vx222_create(struct snd_card *card, struct pci_dev *pci,
        }
        chip->irq = pci->irq;
        card->sync_irq = chip->irq;
+       *rchip = vx;
 
        return 0;
 }
index e822fa1..4a64cab 100644 (file)
@@ -2942,9 +2942,6 @@ static int rt5682_suspend(struct snd_soc_component *component)
                        break;
                }
 
-               snd_soc_component_update_bits(component, RT5682_PWR_ANLG_3,
-                       RT5682_PWR_CBJ, 0);
-
                /* enter SAR ADC power saving mode */
                snd_soc_component_update_bits(component, RT5682_SAR_IL_CMD_1,
                        RT5682_SAR_BUTT_DET_MASK | RT5682_SAR_BUTDET_MODE_MASK |
index 5e71382..546f6fd 100644 (file)
@@ -285,6 +285,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
        if (li->cpu) {
                struct snd_soc_card *card = simple_priv_to_card(priv);
                struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
+               struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0);
                int is_single_links = 0;
 
                /* Codec is dummy */
@@ -313,6 +314,7 @@ static int graph_dai_link_of_dpcm(struct asoc_simple_priv *priv,
                        dai_link->no_pcm = 1;
 
                asoc_simple_canonicalize_cpu(cpus, is_single_links);
+               asoc_simple_canonicalize_platform(platforms, cpus);
        } else {
                struct snd_soc_codec_conf *cconf = simple_props_to_codec_conf(dai_props, 0);
                struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
@@ -366,6 +368,7 @@ static int graph_dai_link_of(struct asoc_simple_priv *priv,
        struct snd_soc_dai_link *dai_link = simple_priv_to_link(priv, li->link);
        struct snd_soc_dai_link_component *cpus = asoc_link_to_cpu(dai_link, 0);
        struct snd_soc_dai_link_component *codecs = asoc_link_to_codec(dai_link, 0);
+       struct snd_soc_dai_link_component *platforms = asoc_link_to_platform(dai_link, 0);
        char dai_name[64];
        int ret, is_single_links = 0;
 
@@ -383,6 +386,7 @@ static int graph_dai_link_of(struct asoc_simple_priv *priv,
                 "%s-%s", cpus->dai_name, codecs->dai_name);
 
        asoc_simple_canonicalize_cpu(cpus, is_single_links);
+       asoc_simple_canonicalize_platform(platforms, cpus);
 
        ret = graph_link_init(priv, cpu_ep, codec_ep, li, dai_name);
        if (ret < 0)
@@ -608,6 +612,7 @@ static int graph_count_noml(struct asoc_simple_priv *priv,
 
        li->num[li->link].cpus          = 1;
        li->num[li->link].codecs        = 1;
+       li->num[li->link].platforms     = 1;
 
        li->link += 1; /* 1xCPU-Codec */
 
@@ -630,6 +635,7 @@ static int graph_count_dpcm(struct asoc_simple_priv *priv,
 
        if (li->cpu) {
                li->num[li->link].cpus          = 1;
+               li->num[li->link].platforms     = 1;
 
                li->link++; /* 1xCPU-dummy */
        } else {
index 046955b..61b71d6 100644 (file)
@@ -602,7 +602,7 @@ config SND_SOC_INTEL_SOUNDWIRE_SOF_MACH
        select SND_SOC_DMIC
        select SND_SOC_INTEL_HDA_DSP_COMMON
        select SND_SOC_INTEL_SOF_MAXIM_COMMON
-       select SND_SOC_SDW_MOCKUP
+       imply SND_SOC_SDW_MOCKUP
        help
          Add support for Intel SoundWire-based platforms connected to
          MAX98373, RT700, RT711, RT1308 and RT715
index cf567a8..5a2f466 100644 (file)
@@ -187,6 +187,7 @@ config SND_SOC_MT8192_MT6359_RT1015_RT5682
 
 config SND_SOC_MT8195
        tristate "ASoC support for Mediatek MT8195 chip"
+       depends on ARCH_MEDIATEK || COMPILE_TEST
        select SND_SOC_MEDIATEK
        help
          This adds ASoC platform driver support for Mediatek MT8195 chip
@@ -197,7 +198,7 @@ config SND_SOC_MT8195
 config SND_SOC_MT8195_MT6359_RT1019_RT5682
        tristate "ASoC Audio driver for MT8195 with MT6359 RT1019 RT5682 codec"
        depends on I2C
-       depends on SND_SOC_MT8195
+       depends on SND_SOC_MT8195 && MTK_PMIC_WRAP
        select SND_SOC_MT6359
        select SND_SOC_RT1015P
        select SND_SOC_RT5682_I2C
index 5dc217f..c97ace7 100644 (file)
@@ -1018,13 +1018,12 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev)
                                of_parse_phandle(pdev->dev.of_node,
                                                 "mediatek,dptx-codec", 0);
                        if (!dai_link->codecs->of_node) {
-                               dev_err(&pdev->dev, "Property 'dptx-codec' missing or invalid\n");
-                               return -EINVAL;
+                               dev_dbg(&pdev->dev, "No property 'dptx-codec'\n");
+                       } else {
+                               dai_link->codecs->name = NULL;
+                               dai_link->codecs->dai_name = "i2s-hifi";
+                               dai_link->init = mt8195_dptx_codec_init;
                        }
-
-                       dai_link->codecs->name = NULL;
-                       dai_link->codecs->dai_name = "i2s-hifi";
-                       dai_link->init = mt8195_dptx_codec_init;
                }
 
                if (strcmp(dai_link->name, "ETDM3_OUT_BE") == 0) {
@@ -1032,13 +1031,12 @@ static int mt8195_mt6359_rt1019_rt5682_dev_probe(struct platform_device *pdev)
                                of_parse_phandle(pdev->dev.of_node,
                                                 "mediatek,hdmi-codec", 0);
                        if (!dai_link->codecs->of_node) {
-                               dev_err(&pdev->dev, "Property 'hdmi-codec' missing or invalid\n");
-                               return -EINVAL;
+                               dev_dbg(&pdev->dev, "No property 'hdmi-codec'\n");
+                       } else {
+                               dai_link->codecs->name = NULL;
+                               dai_link->codecs->dai_name = "i2s-hifi";
+                               dai_link->init = mt8195_hdmi_codec_init;
                        }
-
-                       dai_link->codecs->name = NULL;
-                       dai_link->codecs->dai_name = "i2s-hifi";
-                       dai_link->init = mt8195_hdmi_codec_init;
                }
        }
 
index 53e0b4a..7e89f5b 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
+#include <linux/spinlock.h>
 #include <sound/pcm_params.h>
 #include <sound/dmaengine_pcm.h>
 
@@ -53,6 +54,7 @@ struct rk_i2s_dev {
        bool is_master_mode;
        const struct rk_i2s_pins *pins;
        unsigned int bclk_ratio;
+       spinlock_t lock; /* tx/rx lock */
 };
 
 static int i2s_runtime_suspend(struct device *dev)
@@ -96,6 +98,7 @@ static void rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
        unsigned int val = 0;
        int retry = 10;
 
+       spin_lock(&i2s->lock);
        if (on) {
                regmap_update_bits(i2s->regmap, I2S_DMACR,
                                   I2S_DMACR_TDE_ENABLE, I2S_DMACR_TDE_ENABLE);
@@ -136,6 +139,7 @@ static void rockchip_snd_txctrl(struct rk_i2s_dev *i2s, int on)
                        }
                }
        }
+       spin_unlock(&i2s->lock);
 }
 
 static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
@@ -143,6 +147,7 @@ static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
        unsigned int val = 0;
        int retry = 10;
 
+       spin_lock(&i2s->lock);
        if (on) {
                regmap_update_bits(i2s->regmap, I2S_DMACR,
                                   I2S_DMACR_RDE_ENABLE, I2S_DMACR_RDE_ENABLE);
@@ -183,6 +188,7 @@ static void rockchip_snd_rxctrl(struct rk_i2s_dev *i2s, int on)
                        }
                }
        }
+       spin_unlock(&i2s->lock);
 }
 
 static int rockchip_i2s_set_fmt(struct snd_soc_dai *cpu_dai,
@@ -684,6 +690,7 @@ static int rockchip_i2s_probe(struct platform_device *pdev)
        if (!i2s)
                return -ENOMEM;
 
+       spin_lock_init(&i2s->lock);
        i2s->dev = &pdev->dev;
 
        i2s->grf = syscon_regmap_lookup_by_phandle(node, "rockchip,grf");
index 81a29d1..0cc6677 100644 (file)
@@ -327,7 +327,7 @@ int simtec_audio_core_probe(struct platform_device *pdev,
 
        snd_dev = platform_device_alloc("soc-audio", -1);
        if (!snd_dev) {
-               dev_err(&pdev->dev, "failed to alloc soc-audio devicec\n");
+               dev_err(&pdev->dev, "failed to alloc soc-audio device\n");
                ret = -ENOMEM;
                goto err_gpio;
        }
index 4479a59..6ee6d24 100644 (file)
@@ -1743,6 +1743,7 @@ static const struct registration_quirk registration_quirks[] = {
        REG_QUIRK_ENTRY(0x0951, 0x16ed, 2),     /* Kingston HyperX Cloud Alpha S */
        REG_QUIRK_ENTRY(0x0951, 0x16ea, 2),     /* Kingston HyperX Cloud Flight S */
        REG_QUIRK_ENTRY(0x0ecb, 0x1f46, 2),     /* JBL Quantum 600 */
+       REG_QUIRK_ENTRY(0x0ecb, 0x1f47, 2),     /* JBL Quantum 800 */
        REG_QUIRK_ENTRY(0x0ecb, 0x2039, 2),     /* JBL Quantum 400 */
        REG_QUIRK_ENTRY(0x0ecb, 0x203c, 2),     /* JBL Quantum 600 */
        REG_QUIRK_ENTRY(0x0ecb, 0x203e, 2),     /* JBL Quantum 800 */
index f45fa99..fd67496 100644 (file)
@@ -111,9 +111,11 @@ static void xbc_show_list(void)
        char key[XBC_KEYLEN_MAX];
        struct xbc_node *leaf;
        const char *val;
+       int ret;
 
        xbc_for_each_key_value(leaf, val) {
-               if (xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX) < 0) {
+               ret = xbc_node_compose_key(leaf, key, XBC_KEYLEN_MAX);
+               if (ret < 0) {
                        fprintf(stderr, "Failed to compose key %d\n", ret);
                        break;
                }
index fbaf07d..6183b36 100755 (executable)
@@ -239,8 +239,8 @@ instance_options() { # [instance-name]
                emit_kv $PREFIX.cpumask = $val
        fi
        val=`cat $INSTANCE/tracing_on`
-       if [ `echo $val | sed -e s/f//g`x != x ]; then
-               emit_kv $PREFIX.tracing_on = $val
+       if [ "$val" = "0" ]; then
+               emit_kv $PREFIX.tracing_on = 0
        fi
 
        val=`cat $INSTANCE/current_tracer`
index baed891..f68e2e9 100755 (executable)
@@ -26,7 +26,7 @@ trap cleanup EXIT TERM
 NO=1
 
 xpass() { # pass test command
-  echo "test case $NO ($3)... "
+  echo "test case $NO ($*)... "
   if ! ($@ && echo "\t\t[OK]"); then
      echo "\t\t[NG]"; NG=$((NG + 1))
   fi
@@ -34,7 +34,7 @@ xpass() { # pass test command
 }
 
 xfail() { # fail test command
-  echo "test case $NO ($3)... "
+  echo "test case $NO ($*)... "
   if ! (! $@ && echo "\t\t[OK]"); then
      echo "\t\t[NG]"; NG=$((NG + 1))
   fi
index 25a3da4..5f5b2ba 100644 (file)
@@ -22,7 +22,7 @@ ls
 echo 0 > events/eprobes/$EPROBE/enable
 
 content=`grep '^ *ls-' trace | grep 'file='`
-nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."'` || true
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."' -e '(fault)' ` || true
 
 if [ -z "$content" ]; then
        exit_fail