Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Jun 2018 11:59:52 +0000 (19:59 +0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 24 Jun 2018 11:59:52 +0000 (19:59 +0800)
Pull x86 fixes from Thomas Gleixner:
 "A set of fixes for x86:

   - Make Xen PV guest deal with speculative store bypass correctly

   - Address more fallout from the 5-Level pagetable handling. Undo an
     __initdata annotation to avoid section mismatch and malfunction
     when post init code would touch the freed variable.

   - Handle exception fixup in math_error() before calling notify_die().
     The reverse call order incorrectly triggers notify_die() listeners
     for soemthing which is handled correctly at the site which issues
     the floating point instruction.

   - Fix an off by one in the LLC topology calculation on AMD

   - Handle non standard memory block sizes gracefully un UV platforms

   - Plug a memory leak in the microcode loader

   - Sanitize the purgatory build magic

   - Add the x86 specific device tree bindings directory to the x86
     MAINTAINER file patterns"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Fix 'no5lvl' handling
  Revert "x86/mm: Mark __pgtable_l5_enabled __initdata"
  x86/CPU/AMD: Fix LLC ID bit-shift calculation
  MAINTAINERS: Add file patterns for x86 device tree bindings
  x86/microcode/intel: Fix memleak in save_microcode_patch()
  x86/platform/UV: Add kernel parameter to set memory block size
  x86/platform/UV: Use new set memory block size function
  x86/platform/UV: Add adjustable set memory block size function
  x86/build: Remove unnecessary preparation for purgatory
  Revert "kexec/purgatory: Add clean-up for purgatory directory"
  x86/xen: Add call of speculative_store_bypass_ht_init() to PV paths
  x86: Call fixup_exception() before notify_die() in math_error()

297 files changed:
Documentation/admin-guide/pm/intel_pstate.rst
Documentation/driver-api/infrastructure.rst
Documentation/trace/histogram.txt
Documentation/virtual/kvm/api.txt
MAINTAINERS
arch/alpha/Kconfig
arch/alpha/lib/Makefile
arch/alpha/lib/dec_and_lock.c [deleted file]
arch/arm/xen/enlighten.c
arch/arm64/crypto/aes-glue.c
arch/arm64/include/asm/kvm_host.h
arch/arm64/include/asm/sysreg.h
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/smp.c
arch/arm64/kvm/fpsimd.c
arch/arm64/mm/dma-mapping.c
arch/arm64/mm/proc.S
arch/mips/Kconfig
arch/mips/ath79/mach-pb44.c
arch/mips/bcm47xx/setup.c
arch/mips/include/asm/io.h
arch/mips/include/asm/mipsregs.h
arch/mips/include/uapi/asm/unistd.h
arch/mips/kernel/entry.S
arch/mips/kernel/mcount.S
arch/mips/kernel/scall32-o32.S
arch/mips/kernel/scall64-64.S
arch/mips/kernel/scall64-n32.S
arch/mips/kernel/scall64-o32.S
arch/mips/kernel/signal.c
arch/powerpc/Makefile
arch/powerpc/include/asm/book3s/32/pgalloc.h
arch/powerpc/include/asm/book3s/64/pgtable-4k.h
arch/powerpc/include/asm/book3s/64/pgtable-64k.h
arch/powerpc/include/asm/book3s/64/pgtable.h
arch/powerpc/include/asm/nmi.h
arch/powerpc/include/asm/nohash/32/pgalloc.h
arch/powerpc/include/asm/nohash/64/pgalloc.h
arch/powerpc/kernel/dt_cpu_ftrs.c
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/smp.c
arch/powerpc/kernel/stacktrace.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/mm/pgtable-book3s64.c
arch/powerpc/mm/tlb-radix.c
arch/s390/include/asm/css_chars.h
arch/x86/include/asm/barrier.h
arch/x86/include/asm/vmx.h
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/mcheck/mce-severity.c
arch/x86/kernel/cpu/mcheck/mce.c
arch/x86/kernel/quirks.c
arch/x86/kvm/vmx.c
arch/x86/kvm/x86.h
arch/x86/xen/enlighten.c
arch/x86/xen/enlighten_pv.c
arch/x86/xen/enlighten_pvh.c
block/bio.c
block/blk-core.c
block/blk-mq-debugfs.c
block/blk-mq.c
block/blk-softirq.c
block/blk-timeout.c
block/sed-opal.c
crypto/morus640.c
crypto/sha3_generic.c
drivers/acpi/acpi_lpss.c
drivers/acpi/ec.c
drivers/base/Makefile
drivers/base/core.c
drivers/base/dma-coherent.c [deleted file]
drivers/base/dma-contiguous.c [deleted file]
drivers/base/dma-mapping.c [deleted file]
drivers/block/nbd.c
drivers/block/null_blk.c
drivers/bluetooth/hci_nokia.c
drivers/char/hw_random/core.c
drivers/clocksource/timer-stm32.c
drivers/cpufreq/intel_pstate.c
drivers/cpufreq/qcom-cpufreq-kryo.c
drivers/crypto/chelsio/chtls/chtls_io.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
drivers/gpu/drm/amd/powerplay/hwmgr/vega10_powertune.c
drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_plane.c
drivers/gpu/drm/bridge/sil-sii8620.c
drivers/gpu/drm/drm_drv.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/i915/i915_gem_context.c
drivers/gpu/drm/i915/i915_gem_execbuffer.c
drivers/gpu/drm/i915/i915_irq.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_crt.c
drivers/gpu/drm/i915/intel_display.c
drivers/gpu/drm/i915/intel_dp.c
drivers/gpu/drm/i915/intel_dp_mst.c
drivers/gpu/drm/i915/intel_dsi.c
drivers/gpu/drm/i915/intel_dvo.c
drivers/gpu/drm/i915/intel_hdmi.c
drivers/gpu/drm/i915/intel_lrc.c
drivers/gpu/drm/i915/intel_lvds.c
drivers/gpu/drm/i915/intel_sdvo.c
drivers/gpu/drm/i915/intel_tv.c
drivers/gpu/drm/nouveau/dispnv50/curs507a.c
drivers/gpu/drm/nouveau/dispnv50/wndw.c
drivers/gpu/drm/qxl/qxl_display.c
drivers/gpu/drm/sun4i/sun4i_tcon.c
drivers/hid/hid-google-hammer.c
drivers/hid/hid-ids.h
drivers/hid/hid-steam.c
drivers/hid/intel-ish-hid/ipc/pci-ish.c
drivers/hid/wacom_sys.c
drivers/hwmon/dell-smm-hwmon.c
drivers/hwmon/nct6775.c
drivers/infiniband/core/uverbs_main.c
drivers/infiniband/core/verbs.c
drivers/infiniband/hw/mlx4/mr.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/sw/rxe/rxe_req.c
drivers/irqchip/irq-gic-v2m.c
drivers/irqchip/irq-gic-v3-its.c
drivers/irqchip/irq-ls-scfg-msi.c
drivers/lightnvm/Kconfig
drivers/net/ethernet/cisco/enic/enic_clsf.c
drivers/net/ethernet/cisco/enic/enic_main.c
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/qlogic/qed/qed_dcbx.c
drivers/net/ethernet/qlogic/qed/qed_ll2.c
drivers/net/ethernet/qlogic/qed/qed_main.c
drivers/net/ethernet/stmicro/stmmac/Kconfig
drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/sun/sungem.c
drivers/net/ethernet/ti/davinci_emac.c
drivers/net/hamradio/bpqether.c
drivers/net/ipvlan/ipvlan_main.c
drivers/net/net_failover.c
drivers/net/usb/cdc_ncm.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/target/core.c
drivers/opp/core.c
drivers/pinctrl/actions/pinctrl-owl.c
drivers/pinctrl/devicetree.c
drivers/pinctrl/mediatek/pinctrl-mt7622.c
drivers/pinctrl/mediatek/pinctrl-mtk-common.c
drivers/pinctrl/pinctrl-single.c
drivers/ptp/ptp_chardev.c
drivers/ptp/ptp_qoriq.c
drivers/s390/block/dasd.c
drivers/s390/block/dasd_alias.c
drivers/s390/block/dasd_diag.c
drivers/s390/block/dasd_eckd.c
drivers/s390/block/dasd_eer.c
drivers/s390/block/dasd_fba.c
drivers/s390/block/dasd_int.h
drivers/s390/cio/Makefile
drivers/s390/cio/vfio_ccw_cp.c
drivers/s390/cio/vfio_ccw_drv.c
drivers/s390/cio/vfio_ccw_fsm.c
drivers/s390/cio/vfio_ccw_trace.h [new file with mode: 0644]
drivers/scsi/scsi_transport_fc.c
drivers/scsi/xen-scsifront.c
drivers/xen/Makefile
drivers/xen/events/events_base.c
drivers/xen/grant-table.c
drivers/xen/manage.c
drivers/xen/privcmd-buf.c [new file with mode: 0644]
drivers/xen/privcmd.c
drivers/xen/privcmd.h
drivers/xen/xen-scsiback.c
fs/ext2/ext2.h
fs/ext2/super.c
fs/jfs/xattr.c
fs/nfs/delegation.c
fs/nfs/flexfilelayout/flexfilelayout.c
fs/nfs/nfs4proc.c
fs/nfs/pnfs.h
fs/proc/base.c
fs/quota/dquot.c
fs/udf/balloc.c
fs/udf/directory.c
fs/udf/inode.c
fs/udf/namei.c
fs/udf/udfdecl.h
include/acpi/processor.h
include/asm-generic/qspinlock_types.h
include/linux/atmdev.h
include/linux/backing-dev-defs.h
include/linux/bpf.h
include/linux/dma-contiguous.h
include/linux/filter.h
include/linux/irq.h
include/linux/irqdesc.h
include/linux/nfs_xdr.h
include/linux/refcount.h
include/linux/spinlock.h
include/net/ip6_fib.h
include/rdma/ib_verbs.h
include/uapi/linux/nbd.h
include/xen/xen.h
init/Kconfig
kernel/Makefile
kernel/bpf/core.c
kernel/bpf/devmap.c
kernel/bpf/syscall.c
kernel/dma/Kconfig [new file with mode: 0644]
kernel/dma/Makefile [new file with mode: 0644]
kernel/dma/coherent.c [new file with mode: 0644]
kernel/dma/contiguous.c [new file with mode: 0644]
kernel/dma/debug.c [new file with mode: 0644]
kernel/dma/direct.c [new file with mode: 0644]
kernel/dma/mapping.c [new file with mode: 0644]
kernel/dma/noncoherent.c [new file with mode: 0644]
kernel/dma/swiotlb.c [new file with mode: 0644]
kernel/dma/virt.c [new file with mode: 0644]
kernel/irq/debugfs.c
kernel/locking/lockdep.c
kernel/locking/rwsem.c
kernel/softirq.c
kernel/time/hrtimer.c
kernel/time/posix-cpu-timers.c
kernel/time/time.c
kernel/trace/trace.c
kernel/trace/trace_events_filter.c
lib/Kconfig
lib/Makefile
lib/dec_and_lock.c
lib/dma-debug.c [deleted file]
lib/dma-direct.c [deleted file]
lib/dma-noncoherent.c [deleted file]
lib/dma-virt.c [deleted file]
lib/refcount.c
lib/swiotlb.c [deleted file]
mm/backing-dev.c
mm/memblock.c
net/atm/br2684.c
net/atm/clip.c
net/atm/common.c
net/atm/lec.c
net/atm/mpc.c
net/atm/pppoatm.c
net/atm/raw.c
net/bpfilter/.gitignore [new file with mode: 0644]
net/bpfilter/Makefile
net/core/dev.c
net/core/filter.c
net/ipv4/inet_hashtables.c
net/ipv4/ip_output.c
net/ipv6/inet6_hashtables.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_output.c
net/ncsi/ncsi-aen.c
net/ncsi/ncsi-manage.c
net/sched/act_ife.c
net/sched/sch_blackhole.c
net/sunrpc/xprt.c
net/xdp/xsk.c
scripts/Makefile.build
tools/bpf/bpftool/perf.c
tools/bpf/bpftool/prog.c
tools/power/x86/turbostat/turbostat.8
tools/power/x86/turbostat/turbostat.c
tools/testing/selftests/bpf/config
tools/testing/selftests/bpf/test_offload.py
tools/testing/selftests/bpf/test_tunnel.sh
tools/testing/selftests/pstore/pstore_post_reboot_tests
tools/testing/selftests/rseq/param_test.c
tools/testing/selftests/rseq/rseq-mips.h [new file with mode: 0644]
tools/testing/selftests/rseq/rseq.h
tools/testing/selftests/sparc64/Makefile
tools/testing/selftests/sparc64/drivers/Makefile
tools/testing/selftests/static_keys/test_static_keys.sh
tools/testing/selftests/sync/config [new file with mode: 0644]
tools/testing/selftests/sysctl/sysctl.sh
tools/testing/selftests/user/test_user_copy.sh
tools/testing/selftests/vm/compaction_test.c
tools/testing/selftests/vm/mlock2-tests.c
tools/testing/selftests/vm/run_vmtests
tools/testing/selftests/vm/userfaultfd.c
tools/testing/selftests/zram/zram.sh
tools/testing/selftests/zram/zram_lib.sh
virt/kvm/Kconfig
virt/kvm/arm/mmu.c
virt/kvm/arm/vgic/vgic-v3.c
virt/kvm/kvm_main.c

index ab2fe0e..8b91649 100644 (file)
@@ -410,7 +410,7 @@ argument is passed to the kernel in the command line.
        That only is supported in some configurations, though (for example, if
        the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
        the operation mode of the driver cannot be changed), and if it is not
-       supported in the current configuration, writes to this attribute with
+       supported in the current configuration, writes to this attribute will
        fail with an appropriate error.
 
 Interpretation of Policy Attributes
index bee1b9a..6172f3c 100644 (file)
@@ -49,10 +49,10 @@ Device Drivers Base
 Device Drivers DMA Management
 -----------------------------
 
-.. kernel-doc:: drivers/base/dma-coherent.c
+.. kernel-doc:: kernel/dma/coherent.c
    :export:
 
-.. kernel-doc:: drivers/base/dma-mapping.c
+.. kernel-doc:: kernel/dma/mapping.c
    :export:
 
 Device drivers PnP support
index e73bcf9..7ffea6a 100644 (file)
@@ -1729,35 +1729,35 @@ If a variable isn't a key variable or prefixed with 'vals=', the
 associated event field will be saved in a variable but won't be summed
 as a value:
 
-  # echo 'hist:keys=next_pid:ts1=common_timestamp ... >> event/trigger
+  # echo 'hist:keys=next_pid:ts1=common_timestamp ...' >> event/trigger
 
 Multiple variables can be assigned at the same time.  The below would
 result in both ts0 and b being created as variables, with both
 common_timestamp and field1 additionally being summed as values:
 
-  # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ... >> \
+  # echo 'hist:keys=pid:vals=$ts0,$b:ts0=common_timestamp,b=field1 ...' >> \
        event/trigger
 
 Note that variable assignments can appear either preceding or
 following their use.  The command below behaves identically to the
 command above:
 
-  # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ... >> \
+  # echo 'hist:keys=pid:ts0=common_timestamp,b=field1:vals=$ts0,$b ...' >> \
        event/trigger
 
 Any number of variables not bound to a 'vals=' prefix can also be
 assigned by simply separating them with colons.  Below is the same
 thing but without the values being summed in the histogram:
 
-  # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ... >> event/trigger
+  # echo 'hist:keys=pid:ts0=common_timestamp:b=field1 ...' >> event/trigger
 
 Variables set as above can be referenced and used in expressions on
 another event.
 
 For example, here's how a latency can be calculated:
 
-  # echo 'hist:keys=pid,prio:ts0=common_timestamp ... >> event1/trigger
-  # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ... >> event2/trigger
+  # echo 'hist:keys=pid,prio:ts0=common_timestamp ...' >> event1/trigger
+  # echo 'hist:keys=next_pid:wakeup_lat=common_timestamp-$ts0 ...' >> event2/trigger
 
 In the first line above, the event's timetamp is saved into the
 variable ts0.  In the next line, ts0 is subtracted from the second
@@ -1766,7 +1766,7 @@ yet another variable, 'wakeup_lat'.  The hist trigger below in turn
 makes use of the wakeup_lat variable to compute a combined latency
 using the same key and variable from yet another event:
 
-  # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ... >> event3/trigger
+  # echo 'hist:key=pid:wakeupswitch_lat=$wakeup_lat+$switchtime_lat ...' >> event3/trigger
 
 2.2.2 Synthetic Events
 ----------------------
@@ -1807,10 +1807,11 @@ the command that defined it with a '!':
 At this point, there isn't yet an actual 'wakeup_latency' event
 instantiated in the event subsytem - for this to happen, a 'hist
 trigger action' needs to be instantiated and bound to actual fields
-and variables defined on other events (see Section 6.3.3 below).
+and variables defined on other events (see Section 2.2.3 below on
+how that is done using hist trigger 'onmatch' action). Once that is
+done, the 'wakeup_latency' synthetic event instance is created.
 
-Once that is done, an event instance is created, and a histogram can
-be defined using it:
+A histogram can now be defined for the new synthetic event:
 
   # echo 'hist:keys=pid,prio,lat.log2:sort=pid,lat' >> \
         /sys/kernel/debug/tracing/events/synthetic/wakeup_latency/trigger
@@ -1960,7 +1961,7 @@ hist trigger specification.
     back to that pid, the timestamp difference is calculated.  If the
     resulting latency, stored in wakeup_lat, exceeds the current
     maximum latency, the values specified in the save() fields are
-    recoreded:
+    recorded:
 
     # echo 'hist:keys=pid:ts0=common_timestamp.usecs \
             if comm=="cyclictest"' >> \
index 495b774..d10944e 100644 (file)
@@ -4610,7 +4610,7 @@ This capability indicates that kvm will implement the interfaces to handle
 reset, migration and nested KVM for branch prediction blocking. The stfle
 facility 82 should not be provided to the guest without this capability.
 
-8.14 KVM_CAP_HYPERV_TLBFLUSH
+8.18 KVM_CAP_HYPERV_TLBFLUSH
 
 Architectures: x86
 
index 5e33d27..6cfd167 100644 (file)
@@ -4360,12 +4360,7 @@ L:       iommu@lists.linux-foundation.org
 T:     git git://git.infradead.org/users/hch/dma-mapping.git
 W:     http://git.infradead.org/users/hch/dma-mapping.git
 S:     Supported
-F:     lib/dma-debug.c
-F:     lib/dma-direct.c
-F:     lib/dma-noncoherent.c
-F:     lib/dma-virt.c
-F:     drivers/base/dma-mapping.c
-F:     drivers/base/dma-coherent.c
+F:     kernel/dma/
 F:     include/asm-generic/dma-mapping.h
 F:     include/linux/dma-direct.h
 F:     include/linux/dma-mapping.h
@@ -9756,6 +9751,11 @@ L:       linux-scsi@vger.kernel.org
 S:     Maintained
 F:     drivers/scsi/NCR_D700.*
 
+NCSI LIBRARY:
+M:     Samuel Mendoza-Jonas <sam@mendozajonas.com>
+S:     Maintained
+F:     net/ncsi/
+
 NCT6775 HARDWARE MONITOR DRIVER
 M:     Guenter Roeck <linux@roeck-us.net>
 L:     linux-hwmon@vger.kernel.org
@@ -13648,7 +13648,7 @@ M:      Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
 L:     iommu@lists.linux-foundation.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/konrad/swiotlb.git
 S:     Supported
-F:     lib/swiotlb.c
+F:     kernel/dma/swiotlb.c
 F:     arch/*/kernel/pci-swiotlb.c
 F:     include/linux/swiotlb.h
 
@@ -15576,6 +15576,13 @@ F:     Documentation/devicetree/bindings/x86/
 F:     Documentation/x86/
 F:     arch/x86/
 
+X86 ENTRY CODE
+M:     Andy Lutomirski <luto@kernel.org>
+L:     linux-kernel@vger.kernel.org
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/asm
+S:     Maintained
+F:     arch/x86/entry/
+
 X86 MCE INFRASTRUCTURE
 M:     Tony Luck <tony.luck@intel.com>
 M:     Borislav Petkov <bp@alien8.de>
@@ -15598,7 +15605,7 @@ F:      drivers/platform/x86/
 F:     drivers/platform/olpc/
 
 X86 VDSO
-M:     Andy Lutomirski <luto@amacapital.net>
+M:     Andy Lutomirski <luto@kernel.org>
 L:     linux-kernel@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/vdso
 S:     Maintained
index 0c4805a..04a4a13 100644 (file)
@@ -555,11 +555,6 @@ config SMP
 
          If you don't know what to do here, say N.
 
-config HAVE_DEC_LOCK
-       bool
-       depends on SMP
-       default y
-
 config NR_CPUS
        int "Maximum number of CPUs (2-32)"
        range 2 32
index 04f9729..854d5e7 100644 (file)
@@ -35,8 +35,6 @@ lib-y =       __divqu.o __remqu.o __divlu.o __remlu.o \
        callback_srm.o srm_puts.o srm_printk.o \
        fls.o
 
-lib-$(CONFIG_SMP) += dec_and_lock.o
-
 # The division routines are built from single source, with different defines.
 AFLAGS___divqu.o = -DDIV
 AFLAGS___remqu.o =       -DREM
diff --git a/arch/alpha/lib/dec_and_lock.c b/arch/alpha/lib/dec_and_lock.c
deleted file mode 100644 (file)
index a117707..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * arch/alpha/lib/dec_and_lock.c
- *
- * ll/sc version of atomic_dec_and_lock()
- * 
- */
-
-#include <linux/spinlock.h>
-#include <linux/atomic.h>
-#include <linux/export.h>
-
-  asm (".text                                  \n\
-       .global _atomic_dec_and_lock            \n\
-       .ent _atomic_dec_and_lock               \n\
-       .align  4                               \n\
-_atomic_dec_and_lock:                          \n\
-       .prologue 0                             \n\
-1:     ldl_l   $1, 0($16)                      \n\
-       subl    $1, 1, $1                       \n\
-       beq     $1, 2f                          \n\
-       stl_c   $1, 0($16)                      \n\
-       beq     $1, 4f                          \n\
-       mb                                      \n\
-       clr     $0                              \n\
-       ret                                     \n\
-2:     br      $29, 3f                         \n\
-3:     ldgp    $29, 0($29)                     \n\
-       br      $atomic_dec_and_lock_1..ng      \n\
-       .subsection 2                           \n\
-4:     br      1b                              \n\
-       .previous                               \n\
-       .end _atomic_dec_and_lock");
-
-static int __used atomic_dec_and_lock_1(atomic_t *atomic, spinlock_t *lock)
-{
-       /* Slow path */
-       spin_lock(lock);
-       if (atomic_dec_and_test(atomic))
-               return 1;
-       spin_unlock(lock);
-       return 0;
-}
-EXPORT_SYMBOL(_atomic_dec_and_lock);
index 8073625..07060e5 100644 (file)
@@ -59,6 +59,9 @@ struct xen_memory_region xen_extra_mem[XEN_EXTRA_MEM_MAX_REGIONS] __initdata;
 
 static __read_mostly unsigned int xen_events_irq;
 
+uint32_t xen_start_flags;
+EXPORT_SYMBOL(xen_start_flags);
+
 int xen_remap_domain_gfn_array(struct vm_area_struct *vma,
                               unsigned long addr,
                               xen_pfn_t *gfn, int nr,
@@ -293,9 +296,7 @@ void __init xen_early_init(void)
        xen_setup_features();
 
        if (xen_feature(XENFEAT_dom0))
-               xen_start_info->flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
-       else
-               xen_start_info->flags &= ~(SIF_INITDOMAIN|SIF_PRIVILEGED);
+               xen_start_flags |= SIF_INITDOMAIN|SIF_PRIVILEGED;
 
        if (!console_set_on_cmdline && !xen_initial_domain())
                add_preferred_console("hvc", 0, NULL);
index 253188f..e3e5095 100644 (file)
@@ -223,8 +223,8 @@ static int ctr_encrypt(struct skcipher_request *req)
                kernel_neon_begin();
                aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
                                (u8 *)ctx->key_enc, rounds, blocks, walk.iv);
-               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
                kernel_neon_end();
+               err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
        }
        if (walk.nbytes) {
                u8 __aligned(8) tail[AES_BLOCK_SIZE];
index fda9a8c..fe8777b 100644 (file)
@@ -306,6 +306,7 @@ struct kvm_vcpu_arch {
 #define KVM_ARM64_FP_ENABLED           (1 << 1) /* guest FP regs loaded */
 #define KVM_ARM64_FP_HOST              (1 << 2) /* host FP regs loaded */
 #define KVM_ARM64_HOST_SVE_IN_USE      (1 << 3) /* backup for host TIF_SVE */
+#define KVM_ARM64_HOST_SVE_ENABLED     (1 << 4) /* SVE enabled for EL0 */
 
 #define vcpu_gp_regs(v)                (&(v)->arch.ctxt.gp_regs)
 
index 6171178..a8f8481 100644 (file)
@@ -728,6 +728,17 @@ asm(
        asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \
 } while (0)
 
+/*
+ * Modify bits in a sysreg. Bits in the clear mask are zeroed, then bits in the
+ * set mask are set. Other bits are left as-is.
+ */
+#define sysreg_clear_set(sysreg, clear, set) do {                      \
+       u64 __scs_val = read_sysreg(sysreg);                            \
+       u64 __scs_new = (__scs_val & ~(u64)(clear)) | (set);            \
+       if (__scs_new != __scs_val)                                     \
+               write_sysreg(__scs_new, sysreg);                        \
+} while (0)
+
 static inline void config_sctlr_el1(u32 clear, u32 set)
 {
        u32 val;
index d2856b1..f24892a 100644 (file)
@@ -937,7 +937,7 @@ static int __init parse_kpti(char *str)
        __kpti_forced = enabled ? 1 : -1;
        return 0;
 }
-__setup("kpti=", parse_kpti);
+early_param("kpti", parse_kpti);
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 #ifdef CONFIG_ARM64_HW_AFDBM
index f3e2e3a..2faa986 100644 (file)
@@ -179,7 +179,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle)
  * This is the secondary CPU boot entry.  We're using this CPUs
  * idle thread stack, but a set of temporary page tables.
  */
-asmlinkage void secondary_start_kernel(void)
+asmlinkage notrace void secondary_start_kernel(void)
 {
        u64 mpidr = read_cpuid_mpidr() & MPIDR_HWID_BITMASK;
        struct mm_struct *mm = &init_mm;
index dc6ecfa..aac7808 100644 (file)
@@ -5,13 +5,14 @@
  * Copyright 2018 Arm Limited
  * Author: Dave Martin <Dave.Martin@arm.com>
  */
-#include <linux/bottom_half.h>
+#include <linux/irqflags.h>
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 #include <linux/kvm_host.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_host.h>
 #include <asm/kvm_mmu.h>
+#include <asm/sysreg.h>
 
 /*
  * Called on entry to KVM_RUN unless this vcpu previously ran at least
@@ -61,10 +62,16 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu)
 {
        BUG_ON(!current->mm);
 
-       vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED | KVM_ARM64_HOST_SVE_IN_USE);
+       vcpu->arch.flags &= ~(KVM_ARM64_FP_ENABLED |
+                             KVM_ARM64_HOST_SVE_IN_USE |
+                             KVM_ARM64_HOST_SVE_ENABLED);
        vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+
        if (test_thread_flag(TIF_SVE))
                vcpu->arch.flags |= KVM_ARM64_HOST_SVE_IN_USE;
+
+       if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
+               vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
 }
 
 /*
@@ -92,19 +99,30 @@ void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu)
  */
 void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu)
 {
-       local_bh_disable();
+       unsigned long flags;
 
-       update_thread_flag(TIF_SVE,
-                          vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+       local_irq_save(flags);
 
        if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED) {
                /* Clean guest FP state to memory and invalidate cpu view */
                fpsimd_save();
                fpsimd_flush_cpu_state();
-       } else if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) {
-               /* Ensure user trap controls are correctly restored */
-               fpsimd_bind_task_to_cpu();
+       } else if (system_supports_sve()) {
+               /*
+                * The FPSIMD/SVE state in the CPU has not been touched, and we
+                * have SVE (and VHE): CPACR_EL1 (alias CPTR_EL2) has been
+                * reset to CPACR_EL1_DEFAULT by the Hyp code, disabling SVE
+                * for EL0.  To avoid spurious traps, restore the trap state
+                * seen by kvm_arch_vcpu_load_fp():
+                */
+               if (vcpu->arch.flags & KVM_ARM64_HOST_SVE_ENABLED)
+                       sysreg_clear_set(CPACR_EL1, 0, CPACR_EL1_ZEN_EL0EN);
+               else
+                       sysreg_clear_set(CPACR_EL1, CPACR_EL1_ZEN_EL0EN, 0);
        }
 
-       local_bh_enable();
+       update_thread_flag(TIF_SVE,
+                          vcpu->arch.flags & KVM_ARM64_HOST_SVE_IN_USE);
+
+       local_irq_restore(flags);
 }
index 49e217a..61e93f0 100644 (file)
@@ -583,13 +583,14 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size,
                                                    size >> PAGE_SHIFT);
                        return NULL;
                }
-               if (!coherent)
-                       __dma_flush_area(page_to_virt(page), iosize);
-
                addr = dma_common_contiguous_remap(page, size, VM_USERMAP,
                                                   prot,
                                                   __builtin_return_address(0));
-               if (!addr) {
+               if (addr) {
+                       memset(addr, 0, size);
+                       if (!coherent)
+                               __dma_flush_area(page_to_virt(page), iosize);
+               } else {
                        iommu_dma_unmap_page(dev, *handle, iosize, 0, attrs);
                        dma_release_from_contiguous(dev, page,
                                                    size >> PAGE_SHIFT);
index 5f9a73a..03646e6 100644 (file)
@@ -217,8 +217,9 @@ ENDPROC(idmap_cpu_replace_ttbr1)
 
        .macro __idmap_kpti_put_pgtable_ent_ng, type
        orr     \type, \type, #PTE_NG           // Same bit for blocks and pages
-       str     \type, [cur_\()\type\()p]       // Update the entry and ensure it
-       dc      civac, cur_\()\type\()p         // is visible to all CPUs.
+       str     \type, [cur_\()\type\()p]       // Update the entry and ensure
+       dmb     sy                              // that it is visible to all
+       dc      civac, cur_\()\type\()p         // CPUs.
        .endm
 
 /*
index 3f9deec..08c10c5 100644 (file)
@@ -65,6 +65,7 @@ config MIPS
        select HAVE_OPROFILE
        select HAVE_PERF_EVENTS
        select HAVE_REGS_AND_STACK_ACCESS_API
+       select HAVE_RSEQ
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
index 6b2c6f3..75fb96c 100644 (file)
@@ -34,7 +34,7 @@
 #define PB44_KEYS_DEBOUNCE_INTERVAL    (3 * PB44_KEYS_POLL_INTERVAL)
 
 static struct gpiod_lookup_table pb44_i2c_gpiod_table = {
-       .dev_id = "i2c-gpio",
+       .dev_id = "i2c-gpio.0",
        .table = {
                GPIO_LOOKUP_IDX("ath79-gpio", PB44_GPIO_I2C_SDA,
                                NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN),
index 6054d49..8c9cbf1 100644 (file)
@@ -212,6 +212,12 @@ static int __init bcm47xx_cpu_fixes(void)
                 */
                if (bcm47xx_bus.bcma.bus.chipinfo.id == BCMA_CHIP_ID_BCM4706)
                        cpu_wait = NULL;
+
+               /*
+                * BCM47XX Erratum "R10: PCIe Transactions Periodically Fail"
+                * Enable ExternalSync for sync instruction to take effect
+                */
+               set_c0_config7(MIPS_CONF7_ES);
                break;
 #endif
        }
index a7d0b83..cea8ad8 100644 (file)
@@ -414,6 +414,8 @@ static inline type pfx##in##bwlq##p(unsigned long port)                     \
        __val = *__addr;                                                \
        slow;                                                           \
                                                                        \
+       /* prevent prefetching of coherent DMA data prematurely */      \
+       rmb();                                                          \
        return pfx##ioswab##bwlq(__addr, __val);                        \
 }
 
index ae461d9..0bc2708 100644 (file)
 #define MIPS_CONF7_WII         (_ULCAST_(1) << 31)
 
 #define MIPS_CONF7_RPS         (_ULCAST_(1) << 2)
+/* ExternalSync */
+#define MIPS_CONF7_ES          (_ULCAST_(1) << 8)
 
 #define MIPS_CONF7_IAR         (_ULCAST_(1) << 10)
 #define MIPS_CONF7_AR          (_ULCAST_(1) << 16)
@@ -2765,6 +2767,7 @@ __BUILD_SET_C0(status)
 __BUILD_SET_C0(cause)
 __BUILD_SET_C0(config)
 __BUILD_SET_C0(config5)
+__BUILD_SET_C0(config7)
 __BUILD_SET_C0(intcontrol)
 __BUILD_SET_C0(intctl)
 __BUILD_SET_C0(srsmap)
index bb05e99..f25dd1d 100644 (file)
 #define __NR_pkey_alloc                        (__NR_Linux + 364)
 #define __NR_pkey_free                 (__NR_Linux + 365)
 #define __NR_statx                     (__NR_Linux + 366)
+#define __NR_rseq                      (__NR_Linux + 367)
+#define __NR_io_pgetevents             (__NR_Linux + 368)
 
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls            366
+#define __NR_Linux_syscalls            368
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux                 4000
-#define __NR_O32_Linux_syscalls                366
+#define __NR_O32_Linux_syscalls                368
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
 #define __NR_pkey_alloc                        (__NR_Linux + 324)
 #define __NR_pkey_free                 (__NR_Linux + 325)
 #define __NR_statx                     (__NR_Linux + 326)
+#define __NR_rseq                      (__NR_Linux + 327)
+#define __NR_io_pgetevents             (__NR_Linux + 328)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls            326
+#define __NR_Linux_syscalls            328
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux                  5000
-#define __NR_64_Linux_syscalls         326
+#define __NR_64_Linux_syscalls         328
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
 #define __NR_pkey_alloc                        (__NR_Linux + 328)
 #define __NR_pkey_free                 (__NR_Linux + 329)
 #define __NR_statx                     (__NR_Linux + 330)
+#define __NR_rseq                      (__NR_Linux + 331)
+#define __NR_io_pgetevents             (__NR_Linux + 332)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls            330
+#define __NR_Linux_syscalls            332
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux                 6000
-#define __NR_N32_Linux_syscalls                330
+#define __NR_N32_Linux_syscalls                332
 
 #endif /* _UAPI_ASM_UNISTD_H */
index 38a3029..d7de8ad 100644 (file)
@@ -79,6 +79,10 @@ FEXPORT(ret_from_fork)
        jal     schedule_tail           # a0 = struct task_struct *prev
 
 FEXPORT(syscall_exit)
+#ifdef CONFIG_DEBUG_RSEQ
+       move    a0, sp
+       jal     rseq_syscall
+#endif
        local_irq_disable               # make sure need_resched and
                                        # signals dont change between
                                        # sampling and return
@@ -141,6 +145,10 @@ work_notifysig:                            # deal with pending signals and
        j       resume_userspace_check
 
 FEXPORT(syscall_exit_partial)
+#ifdef CONFIG_DEBUG_RSEQ
+       move    a0, sp
+       jal     rseq_syscall
+#endif
        local_irq_disable               # make sure need_resched doesn't
                                        # change between and return
        LONG_L  a2, TI_FLAGS($28)       # current->work
index f2ee7e1..cff52b2 100644 (file)
@@ -119,10 +119,20 @@ NESTED(_mcount, PT_SIZE, ra)
 EXPORT_SYMBOL(_mcount)
        PTR_LA  t1, ftrace_stub
        PTR_L   t2, ftrace_trace_function /* Prepare t2 for (1) */
-       bne     t1, t2, static_trace
+       beq     t1, t2, fgraph_trace
         nop
 
+       MCOUNT_SAVE_REGS
+
+       move    a0, ra          /* arg1: self return address */
+       jalr    t2              /* (1) call *ftrace_trace_function */
+        move   a1, AT          /* arg2: parent's return address */
+
+       MCOUNT_RESTORE_REGS
+
+fgraph_trace:
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+       PTR_LA  t1, ftrace_stub
        PTR_L   t3, ftrace_graph_return
        bne     t1, t3, ftrace_graph_caller
         nop
@@ -131,24 +141,11 @@ EXPORT_SYMBOL(_mcount)
        bne     t1, t3, ftrace_graph_caller
         nop
 #endif
-       b       ftrace_stub
-#ifdef CONFIG_32BIT
-        addiu sp, sp, 8
-#else
-        nop
-#endif
 
-static_trace:
-       MCOUNT_SAVE_REGS
-
-       move    a0, ra          /* arg1: self return address */
-       jalr    t2              /* (1) call *ftrace_trace_function */
-        move   a1, AT          /* arg2: parent's return address */
-
-       MCOUNT_RESTORE_REGS
 #ifdef CONFIG_32BIT
        addiu sp, sp, 8
 #endif
+
        .globl ftrace_stub
 ftrace_stub:
        RETURN_BACK
index a9a7d78..91d3c8c 100644 (file)
@@ -590,3 +590,5 @@ EXPORT(sys_call_table)
        PTR     sys_pkey_alloc
        PTR     sys_pkey_free                   /* 4365 */
        PTR     sys_statx
+       PTR     sys_rseq
+       PTR     sys_io_pgetevents
index 65d5aee..358d959 100644 (file)
@@ -439,4 +439,6 @@ EXPORT(sys_call_table)
        PTR     sys_pkey_alloc
        PTR     sys_pkey_free                   /* 5325 */
        PTR     sys_statx
+       PTR     sys_rseq
+       PTR     sys_io_pgetevents
        .size   sys_call_table,.-sys_call_table
index cbf190e..c65eaac 100644 (file)
@@ -434,4 +434,6 @@ EXPORT(sysn32_call_table)
        PTR     sys_pkey_alloc
        PTR     sys_pkey_free
        PTR     sys_statx                       /* 6330 */
+       PTR     sys_rseq
+       PTR     compat_sys_io_pgetevents
        .size   sysn32_call_table,.-sysn32_call_table
index 9ebe3e2..73913f0 100644 (file)
@@ -583,4 +583,6 @@ EXPORT(sys32_call_table)
        PTR     sys_pkey_alloc
        PTR     sys_pkey_free                   /* 4365 */
        PTR     sys_statx
+       PTR     sys_rseq
+       PTR     compat_sys_io_pgetevents
        .size   sys32_call_table,.-sys32_call_table
index 9e22446..00f2535 100644 (file)
@@ -801,6 +801,8 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
                regs->regs[0] = 0;              /* Don't deal with this again.  */
        }
 
+       rseq_signal_deliver(regs);
+
        if (sig_uses_siginfo(&ksig->ka, abi))
                ret = abi->setup_rt_frame(vdso + abi->vdso->off_rt_sigreturn,
                                          ksig, regs, oldset);
@@ -868,6 +870,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
        if (thread_info_flags & _TIF_NOTIFY_RESUME) {
                clear_thread_flag(TIF_NOTIFY_RESUME);
                tracehook_notify_resume(regs);
+               rseq_handle_notify_resume(regs);
        }
 
        user_enter();
index bd06a3c..2ea575c 100644 (file)
@@ -244,6 +244,7 @@ cpu-as-$(CONFIG_4xx)                += -Wa,-m405
 cpu-as-$(CONFIG_ALTIVEC)       += $(call as-option,-Wa$(comma)-maltivec)
 cpu-as-$(CONFIG_E200)          += -Wa,-me200
 cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4
+cpu-as-$(CONFIG_PPC_E500MC)    += $(call as-option,-Wa$(comma)-me500mc)
 
 KBUILD_AFLAGS += $(cpu-as-y)
 KBUILD_CFLAGS += $(cpu-as-y)
index 6a66739..e463380 100644 (file)
@@ -108,6 +108,7 @@ static inline void pgtable_free(void *table, unsigned index_size)
 }
 
 #define check_pgt_cache()      do { } while (0)
+#define get_hugepd_cache_index(x)  (x)
 
 #ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
index af5f2ba..a069dfc 100644 (file)
@@ -49,6 +49,27 @@ static inline int hugepd_ok(hugepd_t hpd)
 }
 #define is_hugepd(hpd)         (hugepd_ok(hpd))
 
+/*
+ * 16M and 16G huge page directory tables are allocated from slab cache
+ *
+ */
+#define H_16M_CACHE_INDEX (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE - 24)
+#define H_16G_CACHE_INDEX                                                      \
+       (PAGE_SHIFT + H_PTE_INDEX_SIZE + H_PMD_INDEX_SIZE + H_PUD_INDEX_SIZE - 34)
+
+static inline int get_hugepd_cache_index(int index)
+{
+       switch (index) {
+       case H_16M_CACHE_INDEX:
+               return HTLB_16M_INDEX;
+       case H_16G_CACHE_INDEX:
+               return HTLB_16G_INDEX;
+       default:
+               BUG();
+       }
+       /* should not reach */
+}
+
 #else /* !CONFIG_HUGETLB_PAGE */
 static inline int pmd_huge(pmd_t pmd) { return 0; }
 static inline int pud_huge(pud_t pud) { return 0; }
index fb4b3ba..d7ee249 100644 (file)
@@ -45,8 +45,17 @@ static inline int hugepd_ok(hugepd_t hpd)
 {
        return 0;
 }
+
 #define is_hugepd(pdep)                        0
 
+/*
+ * This should never get called
+ */
+static inline int get_hugepd_cache_index(int index)
+{
+       BUG();
+}
+
 #else /* !CONFIG_HUGETLB_PAGE */
 static inline int pmd_huge(pmd_t pmd) { return 0; }
 static inline int pud_huge(pud_t pud) { return 0; }
index 63cee15..42aafba 100644 (file)
@@ -287,6 +287,11 @@ enum pgtable_index {
        PMD_INDEX,
        PUD_INDEX,
        PGD_INDEX,
+       /*
+        * Below are used with 4k page size and hugetlb
+        */
+       HTLB_16M_INDEX,
+       HTLB_16G_INDEX,
 };
 
 extern unsigned long __vmalloc_start;
index 0f571e0..bd9ba8d 100644 (file)
@@ -8,7 +8,7 @@ extern void arch_touch_nmi_watchdog(void);
 static inline void arch_touch_nmi_watchdog(void) {}
 #endif
 
-#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_STACKTRACE)
+#if defined(CONFIG_NMI_IPI) && defined(CONFIG_STACKTRACE)
 extern void arch_trigger_cpumask_backtrace(const cpumask_t *mask,
                                           bool exclude_self);
 #define arch_trigger_cpumask_backtrace arch_trigger_cpumask_backtrace
index 1707781..9de40eb 100644 (file)
@@ -109,6 +109,7 @@ static inline void pgtable_free(void *table, unsigned index_size)
 }
 
 #define check_pgt_cache()      do { } while (0)
+#define get_hugepd_cache_index(x)      (x)
 
 #ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb,
index 0e693f3..e2d62d0 100644 (file)
@@ -141,6 +141,7 @@ static inline void pgtable_free(void *table, int shift)
        }
 }
 
+#define get_hugepd_cache_index(x)      (x)
 #ifdef CONFIG_SMP
 static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
 {
index 4be1c0d..96dd3d8 100644 (file)
@@ -711,7 +711,8 @@ static __init void cpufeatures_cpu_quirks(void)
                cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_HV_ASSIST;
                cur_cpu_spec->cpu_features |= CPU_FTR_P9_TM_XER_SO_BUG;
                cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
-       } else /* DD2.1 and up have DD2_1 */
+       } else if ((version & 0xffff0000) == 0x004e0000)
+               /* DD2.1 and up have DD2_1 */
                cur_cpu_spec->cpu_features |= CPU_FTR_POWER9_DD2_1;
 
        if ((version & 0xffff0000) == 0x004e0000) {
index 62b1a40..40b44bb 100644 (file)
@@ -700,12 +700,19 @@ EXPORT_SYMBOL(check_legacy_ioport);
 static int ppc_panic_event(struct notifier_block *this,
                              unsigned long event, void *ptr)
 {
+       /*
+        * panic does a local_irq_disable, but we really
+        * want interrupts to be hard disabled.
+        */
+       hard_irq_disable();
+
        /*
         * If firmware-assisted dump has been registered then trigger
         * firmware-assisted dump and let firmware handle everything else.
         */
        crash_fadump(NULL, ptr);
-       ppc_md.panic(ptr);  /* May not return */
+       if (ppc_md.panic)
+               ppc_md.panic(ptr);  /* May not return */
        return NOTIFY_DONE;
 }
 
@@ -716,7 +723,8 @@ static struct notifier_block ppc_panic_block = {
 
 void __init setup_panic(void)
 {
-       if (!ppc_md.panic)
+       /* PPC64 always does a hard irq disable in its panic handler */
+       if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
                return;
        atomic_notifier_chain_register(&panic_notifier_list, &ppc_panic_block);
 }
index 7a7ce8a..225bc5f 100644 (file)
@@ -387,6 +387,14 @@ void early_setup_secondary(void)
 
 #endif /* CONFIG_SMP */
 
+void panic_smp_self_stop(void)
+{
+       hard_irq_disable();
+       spin_begin();
+       while (1)
+               spin_cpu_relax();
+}
+
 #if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
 static bool use_spinloop(void)
 {
index 5eadfff..4794d6b 100644 (file)
@@ -600,9 +600,6 @@ static void nmi_stop_this_cpu(struct pt_regs *regs)
        nmi_ipi_busy_count--;
        nmi_ipi_unlock();
 
-       /* Remove this CPU */
-       set_cpu_online(smp_processor_id(), false);
-
        spin_begin();
        while (1)
                spin_cpu_relax();
@@ -617,9 +614,6 @@ void smp_send_stop(void)
 
 static void stop_this_cpu(void *dummy)
 {
-       /* Remove this CPU */
-       set_cpu_online(smp_processor_id(), false);
-
        hard_irq_disable();
        spin_begin();
        while (1)
index 07e97f2..e2c50b5 100644 (file)
@@ -196,7 +196,7 @@ save_stack_trace_tsk_reliable(struct task_struct *tsk,
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk_reliable);
 #endif /* CONFIG_HAVE_RELIABLE_STACKTRACE */
 
-#ifdef CONFIG_PPC_BOOK3S_64
+#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI)
 static void handle_backtrace_ipi(struct pt_regs *regs)
 {
        nmi_cpu_backtrace(regs);
@@ -242,4 +242,4 @@ void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self)
 {
        nmi_trigger_cpumask_backtrace(mask, exclude_self, raise_backtrace_ipi);
 }
-#endif /* CONFIG_PPC64 */
+#endif /* defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_NMI_IPI) */
index 7c5f479..8a9a49c 100644 (file)
@@ -337,7 +337,8 @@ static void free_hugepd_range(struct mmu_gather *tlb, hugepd_t *hpdp, int pdshif
        if (shift >= pdshift)
                hugepd_free(tlb, hugepte);
        else
-               pgtable_free_tlb(tlb, hugepte, pdshift - shift);
+               pgtable_free_tlb(tlb, hugepte,
+                                get_hugepd_cache_index(pdshift - shift));
 }
 
 static void hugetlb_free_pmd_range(struct mmu_gather *tlb, pud_t *pud,
index c1f4ca4..4afbfbb 100644 (file)
@@ -409,6 +409,18 @@ static inline void pgtable_free(void *table, int index)
        case PUD_INDEX:
                kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
                break;
+#if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
+               /* 16M hugepd directory at pud level */
+       case HTLB_16M_INDEX:
+               BUILD_BUG_ON(H_16M_CACHE_INDEX <= 0);
+               kmem_cache_free(PGT_CACHE(H_16M_CACHE_INDEX), table);
+               break;
+               /* 16G hugepd directory at the pgd level */
+       case HTLB_16G_INDEX:
+               BUILD_BUG_ON(H_16G_CACHE_INDEX <= 0);
+               kmem_cache_free(PGT_CACHE(H_16G_CACHE_INDEX), table);
+               break;
+#endif
                /* We don't free pgd table via RCU callback */
        default:
                BUG();
index 67a6e86..1135b43 100644 (file)
@@ -689,22 +689,17 @@ EXPORT_SYMBOL(radix__flush_tlb_kernel_range);
 static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
 
-void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
-                    unsigned long end)
+static inline void __radix__flush_tlb_range(struct mm_struct *mm,
+                                       unsigned long start, unsigned long end,
+                                       bool flush_all_sizes)
 
 {
-       struct mm_struct *mm = vma->vm_mm;
        unsigned long pid;
        unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift;
        unsigned long page_size = 1UL << page_shift;
        unsigned long nr_pages = (end - start) >> page_shift;
        bool local, full;
 
-#ifdef CONFIG_HUGETLB_PAGE
-       if (is_vm_hugetlb_page(vma))
-               return radix__flush_hugetlb_tlb_range(vma, start, end);
-#endif
-
        pid = mm->context.id;
        if (unlikely(pid == MMU_NO_CONTEXT))
                return;
@@ -738,37 +733,64 @@ is_local:
                                _tlbie_pid(pid, RIC_FLUSH_TLB);
                }
        } else {
-               bool hflush = false;
+               bool hflush = flush_all_sizes;
+               bool gflush = flush_all_sizes;
                unsigned long hstart, hend;
+               unsigned long gstart, gend;
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               hstart = (start + HPAGE_PMD_SIZE - 1) >> HPAGE_PMD_SHIFT;
-               hend = end >> HPAGE_PMD_SHIFT;
-               if (hstart < hend) {
-                       hstart <<= HPAGE_PMD_SHIFT;
-                       hend <<= HPAGE_PMD_SHIFT;
+               if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
                        hflush = true;
+
+               if (hflush) {
+                       hstart = (start + PMD_SIZE - 1) & PMD_MASK;
+                       hend = end & PMD_MASK;
+                       if (hstart == hend)
+                               hflush = false;
+               }
+
+               if (gflush) {
+                       gstart = (start + PUD_SIZE - 1) & PUD_MASK;
+                       gend = end & PUD_MASK;
+                       if (gstart == gend)
+                               gflush = false;
                }
-#endif
 
                asm volatile("ptesync": : :"memory");
                if (local) {
                        __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize);
                        if (hflush)
                                __tlbiel_va_range(hstart, hend, pid,
-                                               HPAGE_PMD_SIZE, MMU_PAGE_2M);
+                                               PMD_SIZE, MMU_PAGE_2M);
+                       if (gflush)
+                               __tlbiel_va_range(gstart, gend, pid,
+                                               PUD_SIZE, MMU_PAGE_1G);
                        asm volatile("ptesync": : :"memory");
                } else {
                        __tlbie_va_range(start, end, pid, page_size, mmu_virtual_psize);
                        if (hflush)
                                __tlbie_va_range(hstart, hend, pid,
-                                               HPAGE_PMD_SIZE, MMU_PAGE_2M);
+                                               PMD_SIZE, MMU_PAGE_2M);
+                       if (gflush)
+                               __tlbie_va_range(gstart, gend, pid,
+                                               PUD_SIZE, MMU_PAGE_1G);
                        fixup_tlbie();
                        asm volatile("eieio; tlbsync; ptesync": : :"memory");
                }
        }
        preempt_enable();
 }
+
+void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+                    unsigned long end)
+
+{
+#ifdef CONFIG_HUGETLB_PAGE
+       if (is_vm_hugetlb_page(vma))
+               return radix__flush_hugetlb_tlb_range(vma, start, end);
+#endif
+
+       __radix__flush_tlb_range(vma->vm_mm, start, end, false);
+}
 EXPORT_SYMBOL(radix__flush_tlb_range);
 
 static int radix_get_mmu_psize(int page_size)
@@ -837,6 +859,8 @@ void radix__tlb_flush(struct mmu_gather *tlb)
        int psize = 0;
        struct mm_struct *mm = tlb->mm;
        int page_size = tlb->page_size;
+       unsigned long start = tlb->start;
+       unsigned long end = tlb->end;
 
        /*
         * if page size is not something we understand, do a full mm flush
@@ -847,15 +871,45 @@ void radix__tlb_flush(struct mmu_gather *tlb)
         */
        if (tlb->fullmm) {
                __flush_all_mm(mm, true);
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
+       } else if (mm_tlb_flush_nested(mm)) {
+               /*
+                * If there is a concurrent invalidation that is clearing ptes,
+                * then it's possible this invalidation will miss one of those
+                * cleared ptes and miss flushing the TLB. If this invalidate
+                * returns before the other one flushes TLBs, that can result
+                * in it returning while there are still valid TLBs inside the
+                * range to be invalidated.
+                *
+                * See mm/memory.c:tlb_finish_mmu() for more details.
+                *
+                * The solution to this is ensure the entire range is always
+                * flushed here. The problem for powerpc is that the flushes
+                * are page size specific, so this "forced flush" would not
+                * do the right thing if there are a mix of page sizes in
+                * the range to be invalidated. So use __flush_tlb_range
+                * which invalidates all possible page sizes in the range.
+                *
+                * PWC flush probably is not be required because the core code
+                * shouldn't free page tables in this path, but accounting
+                * for the possibility makes us a bit more robust.
+                *
+                * need_flush_all is an uncommon case because page table
+                * teardown should be done with exclusive locks held (but
+                * after locks are dropped another invalidate could come
+                * in), it could be optimized further if necessary.
+                */
+               if (!tlb->need_flush_all)
+                       __radix__flush_tlb_range(mm, start, end, true);
+               else
+                       radix__flush_all_mm(mm);
+#endif
        } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
                if (!tlb->need_flush_all)
                        radix__flush_tlb_mm(mm);
                else
                        radix__flush_all_mm(mm);
        } else {
-               unsigned long start = tlb->start;
-               unsigned long end = tlb->end;
-
                if (!tlb->need_flush_all)
                        radix__flush_tlb_range_psize(mm, start, end, psize);
                else
@@ -1043,6 +1097,8 @@ extern void radix_kvm_prefetch_workaround(struct mm_struct *mm)
                for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) {
                        if (sib == cpu)
                                continue;
+                       if (!cpu_possible(sib))
+                               continue;
                        if (paca_ptrs[sib]->kvm_hstate.kvm_vcpu)
                                flush = true;
                }
index 0563fd3..480bb02 100644 (file)
@@ -6,36 +6,38 @@
 
 struct css_general_char {
        u64 : 12;
-       u32 dynio : 1;   /* bit 12 */
-       u32 : 4;
-       u32 eadm : 1;    /* bit 17 */
-       u32 : 23;
-       u32 aif : 1;     /* bit 41 */
-       u32 : 3;
-       u32 mcss : 1;    /* bit 45 */
-       u32 fcs : 1;     /* bit 46 */
-       u32 : 1;
-       u32 ext_mb : 1;  /* bit 48 */
-       u32 : 7;
-       u32 aif_tdd : 1; /* bit 56 */
-       u32 : 1;
-       u32 qebsm : 1;   /* bit 58 */
-       u32 : 2;
-       u32 aiv : 1;     /* bit 61 */
-       u32 : 5;
-       u32 aif_osa : 1; /* bit 67 */
-       u32 : 12;
-       u32 eadm_rf : 1; /* bit 80 */
-       u32 : 1;
-       u32 cib : 1;     /* bit 82 */
-       u32 : 5;
-       u32 fcx : 1;     /* bit 88 */
-       u32 : 19;
-       u32 alt_ssi : 1; /* bit 108 */
-       u32 : 1;
-       u32 narf : 1;    /* bit 110 */
-       u32 : 12;
-       u32 util_str : 1;/* bit 123 */
+       u64 dynio : 1;   /* bit 12 */
+       u64 : 4;
+       u64 eadm : 1;    /* bit 17 */
+       u64 : 23;
+       u64 aif : 1;     /* bit 41 */
+       u64 : 3;
+       u64 mcss : 1;    /* bit 45 */
+       u64 fcs : 1;     /* bit 46 */
+       u64 : 1;
+       u64 ext_mb : 1;  /* bit 48 */
+       u64 : 7;
+       u64 aif_tdd : 1; /* bit 56 */
+       u64 : 1;
+       u64 qebsm : 1;   /* bit 58 */
+       u64 : 2;
+       u64 aiv : 1;     /* bit 61 */
+       u64 : 2;
+
+       u64 : 3;
+       u64 aif_osa : 1; /* bit 67 */
+       u64 : 12;
+       u64 eadm_rf : 1; /* bit 80 */
+       u64 : 1;
+       u64 cib : 1;     /* bit 82 */
+       u64 : 5;
+       u64 fcx : 1;     /* bit 88 */
+       u64 : 19;
+       u64 alt_ssi : 1; /* bit 108 */
+       u64 : 1;
+       u64 narf : 1;    /* bit 110 */
+       u64 : 12;
+       u64 util_str : 1;/* bit 123 */
 } __packed;
 
 extern struct css_general_char css_general_characteristics;
index 042b5e8..14de043 100644 (file)
@@ -38,7 +38,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
 {
        unsigned long mask;
 
-       asm ("cmp %1,%2; sbb %0,%0;"
+       asm volatile ("cmp %1,%2; sbb %0,%0;"
                        :"=r" (mask)
                        :"g"(size),"r" (index)
                        :"cc");
index 425e6b8..6aa8499 100644 (file)
 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK    0x0000001f
 #define VMX_MISC_SAVE_EFER_LMA                 0x00000020
 #define VMX_MISC_ACTIVITY_HLT                  0x00000040
+#define VMX_MISC_ZERO_LEN_INS                  0x40000000
 
 /* VMFUNC functions */
 #define VMX_VMFUNC_EPTP_SWITCHING               0x00000001
@@ -351,11 +352,13 @@ enum vmcs_field {
 #define VECTORING_INFO_VALID_MASK              INTR_INFO_VALID_MASK
 
 #define INTR_TYPE_EXT_INTR              (0 << 8) /* external interrupt */
+#define INTR_TYPE_RESERVED              (1 << 8) /* reserved */
 #define INTR_TYPE_NMI_INTR             (2 << 8) /* NMI */
 #define INTR_TYPE_HARD_EXCEPTION       (3 << 8) /* processor exception */
 #define INTR_TYPE_SOFT_INTR             (4 << 8) /* software interrupt */
 #define INTR_TYPE_PRIV_SW_EXCEPTION    (5 << 8) /* ICE breakpoint - undocumented */
 #define INTR_TYPE_SOFT_EXCEPTION       (6 << 8) /* software exception */
+#define INTR_TYPE_OTHER_EVENT           (7 << 8) /* other event */
 
 /* GUEST_INTERRUPTIBILITY_INFO flags. */
 #define GUEST_INTR_STATE_STI           0x00000001
index cd0fda1..404df26 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/pgtable.h>
 #include <asm/set_memory.h>
 #include <asm/intel-family.h>
+#include <asm/hypervisor.h>
 
 static void __init spectre_v2_select_mitigation(void);
 static void __init ssb_select_mitigation(void);
@@ -664,6 +665,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
                if (boot_cpu_has(X86_FEATURE_PTI))
                        return sprintf(buf, "Mitigation: PTI\n");
 
+               if (hypervisor_is_type(X86_HYPER_XEN_PV))
+                       return sprintf(buf, "Unknown (XEN PV detected, hypervisor mitigation required)\n");
+
                break;
 
        case X86_BUG_SPECTRE_V1:
index 5bbd06f..f34d89c 100644 (file)
@@ -160,6 +160,11 @@ static struct severity {
                SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
                USER
                ),
+       MCESEV(
+               PANIC, "Data load in unrecoverable area of kernel",
+               SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
+               KERNEL
+               ),
 #endif
        MCESEV(
                PANIC, "Action required: unknown MCACOD",
index e4cf6ff..c102ad5 100644 (file)
@@ -772,23 +772,25 @@ EXPORT_SYMBOL_GPL(machine_check_poll);
 static int mce_no_way_out(struct mce *m, char **msg, unsigned long *validp,
                          struct pt_regs *regs)
 {
-       int i, ret = 0;
        char *tmp;
+       int i;
 
        for (i = 0; i < mca_cfg.banks; i++) {
                m->status = mce_rdmsrl(msr_ops.status(i));
-               if (m->status & MCI_STATUS_VAL) {
-                       __set_bit(i, validp);
-                       if (quirk_no_way_out)
-                               quirk_no_way_out(i, m, regs);
-               }
+               if (!(m->status & MCI_STATUS_VAL))
+                       continue;
+
+               __set_bit(i, validp);
+               if (quirk_no_way_out)
+                       quirk_no_way_out(i, m, regs);
 
                if (mce_severity(m, mca_cfg.tolerant, &tmp, true) >= MCE_PANIC_SEVERITY) {
+                       mce_read_aux(m, i);
                        *msg = tmp;
-                       ret = 1;
+                       return 1;
                }
        }
-       return ret;
+       return 0;
 }
 
 /*
@@ -1205,13 +1207,18 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                lmce = m.mcgstatus & MCG_STATUS_LMCES;
 
        /*
+        * Local machine check may already know that we have to panic.
+        * Broadcast machine check begins rendezvous in mce_start()
         * Go through all banks in exclusion of the other CPUs. This way we
         * don't report duplicated events on shared banks because the first one
-        * to see it will clear it. If this is a Local MCE, then no need to
-        * perform rendezvous.
+        * to see it will clear it.
         */
-       if (!lmce)
+       if (lmce) {
+               if (no_way_out)
+                       mce_panic("Fatal local machine check", &m, msg);
+       } else {
                order = mce_start(&no_way_out);
+       }
 
        for (i = 0; i < cfg->banks; i++) {
                __clear_bit(i, toclear);
@@ -1287,12 +1294,17 @@ void do_machine_check(struct pt_regs *regs, long error_code)
                        no_way_out = worst >= MCE_PANIC_SEVERITY;
        } else {
                /*
-                * Local MCE skipped calling mce_reign()
-                * If we found a fatal error, we need to panic here.
+                * If there was a fatal machine check we should have
+                * already called mce_panic earlier in this function.
+                * Since we re-read the banks, we might have found
+                * something new. Check again to see if we found a
+                * fatal error. We call "mce_severity()" again to
+                * make sure we have the right "msg".
                 */
-                if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3)
-                       mce_panic("Machine check from unknown source",
-                               NULL, NULL);
+               if (worst >= MCE_PANIC_SEVERITY && mca_cfg.tolerant < 3) {
+                       mce_severity(&m, cfg->tolerant, &msg, true);
+                       mce_panic("Local fatal machine check!", &m, msg);
+               }
        }
 
        /*
index 697a4ce..736348e 100644 (file)
@@ -645,12 +645,19 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
 /* Skylake */
 static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
 {
-       u32 capid0;
+       u32 capid0, capid5;
 
        pci_read_config_dword(pdev, 0x84, &capid0);
+       pci_read_config_dword(pdev, 0x98, &capid5);
 
-       if ((capid0 & 0xc0) == 0xc0)
+       /*
+        * CAPID0{7:6} indicate whether this is an advanced RAS SKU
+        * CAPID5{8:5} indicate that various NVDIMM usage modes are
+        * enabled, so memory machine check recovery is also enabled.
+        */
+       if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
                static_branch_inc(&mcsafe_key);
+
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ras_cap);
index 559a12b..1689f43 100644 (file)
@@ -1705,6 +1705,17 @@ static inline bool nested_cpu_has_vmwrite_any_field(struct kvm_vcpu *vcpu)
                MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS;
 }
 
+static inline bool nested_cpu_has_zero_length_injection(struct kvm_vcpu *vcpu)
+{
+       return to_vmx(vcpu)->nested.msrs.misc_low & VMX_MISC_ZERO_LEN_INS;
+}
+
+static inline bool nested_cpu_supports_monitor_trap_flag(struct kvm_vcpu *vcpu)
+{
+       return to_vmx(vcpu)->nested.msrs.procbased_ctls_high &
+                       CPU_BASED_MONITOR_TRAP_FLAG;
+}
+
 static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
 {
        return vmcs12->cpu_based_vm_exec_control & bit;
@@ -11620,6 +11631,62 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
            !nested_cr3_valid(vcpu, vmcs12->host_cr3))
                return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD;
 
+       /*
+        * From the Intel SDM, volume 3:
+        * Fields relevant to VM-entry event injection must be set properly.
+        * These fields are the VM-entry interruption-information field, the
+        * VM-entry exception error code, and the VM-entry instruction length.
+        */
+       if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
+               u32 intr_info = vmcs12->vm_entry_intr_info_field;
+               u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
+               u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
+               bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
+               bool should_have_error_code;
+               bool urg = nested_cpu_has2(vmcs12,
+                                          SECONDARY_EXEC_UNRESTRICTED_GUEST);
+               bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
+
+               /* VM-entry interruption-info field: interruption type */
+               if (intr_type == INTR_TYPE_RESERVED ||
+                   (intr_type == INTR_TYPE_OTHER_EVENT &&
+                    !nested_cpu_supports_monitor_trap_flag(vcpu)))
+                       return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+               /* VM-entry interruption-info field: vector */
+               if ((intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
+                   (intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
+                   (intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
+                       return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+               /* VM-entry interruption-info field: deliver error code */
+               should_have_error_code =
+                       intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
+                       x86_exception_has_error_code(vector);
+               if (has_error_code != should_have_error_code)
+                       return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+               /* VM-entry exception error code */
+               if (has_error_code &&
+                   vmcs12->vm_entry_exception_error_code & GENMASK(31, 15))
+                       return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+               /* VM-entry interruption-info field: reserved bits */
+               if (intr_info & INTR_INFO_RESVD_BITS_MASK)
+                       return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+
+               /* VM-entry instruction length */
+               switch (intr_type) {
+               case INTR_TYPE_SOFT_EXCEPTION:
+               case INTR_TYPE_SOFT_INTR:
+               case INTR_TYPE_PRIV_SW_EXCEPTION:
+                       if ((vmcs12->vm_entry_instruction_len > 15) ||
+                           (vmcs12->vm_entry_instruction_len == 0 &&
+                            !nested_cpu_has_zero_length_injection(vcpu)))
+                               return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
+               }
+       }
+
        return 0;
 }
 
index 331993c..257f276 100644 (file)
@@ -110,6 +110,15 @@ static inline bool is_la57_mode(struct kvm_vcpu *vcpu)
 #endif
 }
 
+static inline bool x86_exception_has_error_code(unsigned int vector)
+{
+       static u32 exception_has_error_code = BIT(DF_VECTOR) | BIT(TS_VECTOR) |
+                       BIT(NP_VECTOR) | BIT(SS_VECTOR) | BIT(GP_VECTOR) |
+                       BIT(PF_VECTOR) | BIT(AC_VECTOR);
+
+       return (1U << vector) & exception_has_error_code;
+}
+
 static inline bool mmu_is_nested(struct kvm_vcpu *vcpu)
 {
        return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu;
index c9081c6..3b53185 100644 (file)
@@ -64,6 +64,13 @@ struct shared_info xen_dummy_shared_info;
 __read_mostly int xen_have_vector_callback;
 EXPORT_SYMBOL_GPL(xen_have_vector_callback);
 
+/*
+ * NB: needs to live in .data because it's used by xen_prepare_pvh which runs
+ * before clearing the bss.
+ */
+uint32_t xen_start_flags __attribute__((section(".data"))) = 0;
+EXPORT_SYMBOL(xen_start_flags);
+
 /*
  * Point at some empty memory to start with. We map the real shared_info
  * page as soon as fixmap is up and running.
index 357969a..8d4e2e1 100644 (file)
@@ -1203,6 +1203,7 @@ asmlinkage __visible void __init xen_start_kernel(void)
                return;
 
        xen_domain_type = XEN_PV_DOMAIN;
+       xen_start_flags = xen_start_info->flags;
 
        xen_setup_features();
 
index aa1c6a6..c85d1a8 100644 (file)
@@ -97,6 +97,7 @@ void __init xen_prepare_pvh(void)
        }
 
        xen_pvh = 1;
+       xen_start_flags = pvh_start_info.flags;
 
        msr = cpuid_ebx(xen_cpuid_base() + 2);
        pfn = __pa(hypercall_page);
index 9710e27..67eff5e 100644 (file)
@@ -1807,9 +1807,6 @@ again:
        if (!bio_integrity_endio(bio))
                return;
 
-       if (WARN_ONCE(bio->bi_next, "driver left bi_next not NULL"))
-               bio->bi_next = NULL;
-
        /*
         * Need to have a real endio function for chained bios, otherwise
         * various corner cases will break (like stacking block devices that
index cf0ee76..afd2596 100644 (file)
@@ -273,10 +273,6 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
        bio_advance(bio, nbytes);
 
        /* don't actually finish bio if it's part of flush sequence */
-       /*
-        * XXX this code looks suspicious - it's not consistent with advancing
-        * req->bio in caller
-        */
        if (bio->bi_iter.bi_size == 0 && !(rq->rq_flags & RQF_FLUSH_SEQ))
                bio_endio(bio);
 }
@@ -3081,10 +3077,8 @@ bool blk_update_request(struct request *req, blk_status_t error,
                struct bio *bio = req->bio;
                unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes);
 
-               if (bio_bytes == bio->bi_iter.bi_size) {
+               if (bio_bytes == bio->bi_iter.bi_size)
                        req->bio = bio->bi_next;
-                       bio->bi_next = NULL;
-               }
 
                /* Completion has already been traced */
                bio_clear_flag(bio, BIO_TRACE_COMPLETION);
index ffa6223..1c4532e 100644 (file)
@@ -356,7 +356,7 @@ static const char *const blk_mq_rq_state_name_array[] = {
 
 static const char *blk_mq_rq_state_name(enum mq_rq_state rq_state)
 {
-       if (WARN_ON_ONCE((unsigned int)rq_state >
+       if (WARN_ON_ONCE((unsigned int)rq_state >=
                         ARRAY_SIZE(blk_mq_rq_state_name_array)))
                return "(?)";
        return blk_mq_rq_state_name_array[rq_state];
index 70c65bb..b429d51 100644 (file)
@@ -781,7 +781,6 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
                WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
        }
 
-       req->rq_flags &= ~RQF_TIMED_OUT;
        blk_add_timer(req);
 }
 
index 01e2b35..15c1f5e 100644 (file)
@@ -144,6 +144,7 @@ do_local:
 
        local_irq_restore(flags);
 }
+EXPORT_SYMBOL(__blk_complete_request);
 
 /**
  * blk_complete_request - end I/O on a request
index 4b8a48d..f2cfd56 100644 (file)
@@ -210,6 +210,7 @@ void blk_add_timer(struct request *req)
        if (!req->timeout)
                req->timeout = q->rq_timeout;
 
+       req->rq_flags &= ~RQF_TIMED_OUT;
        blk_rq_set_deadline(req, jiffies + req->timeout);
 
        /*
index 945f4b8..e0de4dd 100644 (file)
@@ -877,7 +877,7 @@ static size_t response_get_string(const struct parsed_resp *resp, int n,
                return 0;
        }
 
-       if (n > resp->num) {
+       if (n >= resp->num) {
                pr_debug("Response has %d tokens. Can't access %d\n",
                         resp->num, n);
                return 0;
@@ -916,7 +916,7 @@ static u64 response_get_u64(const struct parsed_resp *resp, int n)
                return 0;
        }
 
-       if (n > resp->num) {
+       if (n >= resp->num) {
                pr_debug("Response has %d tokens. Can't access %d\n",
                         resp->num, n);
                return 0;
index 9fbcde3..5eede37 100644 (file)
@@ -274,8 +274,9 @@ static void crypto_morus640_decrypt_chunk(struct morus640_state *state, u8 *dst,
                union morus640_block_in tail;
 
                memcpy(tail.bytes, src, size);
+               memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
 
-               crypto_morus640_load_a(&m, src);
+               crypto_morus640_load_a(&m, tail.bytes);
                crypto_morus640_core(state, &m);
                crypto_morus640_store_a(tail.bytes, &m);
                memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
index 264ec12..7f6735d 100644 (file)
@@ -152,7 +152,7 @@ static SHA3_INLINE void keccakf_round(u64 st[25])
        st[24] ^= bc[ 4];
 }
 
-static void __optimize("O3") keccakf(u64 st[25])
+static void keccakf(u64 st[25])
 {
        int round;
 
index 38a2869..f8fecfe 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/pm_domain.h>
 #include <linux/pm_runtime.h>
 #include <linux/pwm.h>
+#include <linux/suspend.h>
 #include <linux/delay.h>
 
 #include "internal.h"
@@ -946,9 +947,10 @@ static void lpss_iosf_exit_d3_state(void)
        mutex_unlock(&lpss_iosf_mutex);
 }
 
-static int acpi_lpss_suspend(struct device *dev, bool wakeup)
+static int acpi_lpss_suspend(struct device *dev, bool runtime)
 {
        struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
+       bool wakeup = runtime || device_may_wakeup(dev);
        int ret;
 
        if (pdata->dev_desc->flags & LPSS_SAVE_CTX)
@@ -961,13 +963,14 @@ static int acpi_lpss_suspend(struct device *dev, bool wakeup)
         * wrong status for devices being about to be powered off. See
         * lpss_iosf_enter_d3_state() for further information.
         */
-       if (lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
+       if ((runtime || !pm_suspend_via_firmware()) &&
+           lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
                lpss_iosf_enter_d3_state();
 
        return ret;
 }
 
-static int acpi_lpss_resume(struct device *dev)
+static int acpi_lpss_resume(struct device *dev, bool runtime)
 {
        struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
        int ret;
@@ -976,7 +979,8 @@ static int acpi_lpss_resume(struct device *dev)
         * This call is kept first to be in symmetry with
         * acpi_lpss_runtime_suspend() one.
         */
-       if (lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
+       if ((runtime || !pm_resume_via_firmware()) &&
+           lpss_quirks & LPSS_QUIRK_ALWAYS_POWER_ON && iosf_mbi_available())
                lpss_iosf_exit_d3_state();
 
        ret = acpi_dev_resume(dev);
@@ -1000,12 +1004,12 @@ static int acpi_lpss_suspend_late(struct device *dev)
                return 0;
 
        ret = pm_generic_suspend_late(dev);
-       return ret ? ret : acpi_lpss_suspend(dev, device_may_wakeup(dev));
+       return ret ? ret : acpi_lpss_suspend(dev, false);
 }
 
 static int acpi_lpss_resume_early(struct device *dev)
 {
-       int ret = acpi_lpss_resume(dev);
+       int ret = acpi_lpss_resume(dev, false);
 
        return ret ? ret : pm_generic_resume_early(dev);
 }
@@ -1020,7 +1024,7 @@ static int acpi_lpss_runtime_suspend(struct device *dev)
 
 static int acpi_lpss_runtime_resume(struct device *dev)
 {
-       int ret = acpi_lpss_resume(dev);
+       int ret = acpi_lpss_resume(dev, true);
 
        return ret ? ret : pm_generic_runtime_resume(dev);
 }
index bb94cf0..442a9e2 100644 (file)
@@ -2037,6 +2037,17 @@ static inline void acpi_ec_query_exit(void)
        }
 }
 
+static const struct dmi_system_id acpi_ec_no_wakeup[] = {
+       {
+               .ident = "Thinkpad X1 Carbon 6th",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "20KGS3JF01"),
+               },
+       },
+       { },
+};
+
 int __init acpi_ec_init(void)
 {
        int result;
@@ -2047,6 +2058,15 @@ int __init acpi_ec_init(void)
        if (result)
                return result;
 
+       /*
+        * Disable EC wakeup on following systems to prevent periodic
+        * wakeup from EC GPE.
+        */
+       if (dmi_check_system(acpi_ec_no_wakeup)) {
+               ec_no_wakeup = true;
+               pr_debug("Disabling EC wakeup on suspend-to-idle\n");
+       }
+
        /* Drivers must be started after acpi_ec_query_init() */
        dsdt_fail = acpi_bus_register_driver(&acpi_ec_driver);
        /*
index b074f24..704f442 100644 (file)
@@ -8,10 +8,7 @@ obj-y                  := component.o core.o bus.o dd.o syscore.o \
                           topology.o container.o property.o cacheinfo.o \
                           devcon.o
 obj-$(CONFIG_DEVTMPFS) += devtmpfs.o
-obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y                  += power/
-obj-$(CONFIG_HAS_DMA)  += dma-mapping.o
-obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += dma-coherent.o
 obj-$(CONFIG_ISA_BUS_API)      += isa.o
 obj-y                          += firmware_loader/
 obj-$(CONFIG_NUMA)     += node.o
index 36622b5..df3e1a4 100644 (file)
@@ -236,6 +236,13 @@ struct device_link *device_link_add(struct device *consumer,
                        link->rpm_active = true;
                }
                pm_runtime_new_link(consumer);
+               /*
+                * If the link is being added by the consumer driver at probe
+                * time, balance the decrementation of the supplier's runtime PM
+                * usage counter after consumer probe in driver_probe_device().
+                */
+               if (consumer->links.status == DL_DEV_PROBING)
+                       pm_runtime_get_noresume(supplier);
        }
        get_device(supplier);
        link->supplier = supplier;
@@ -255,12 +262,12 @@ struct device_link *device_link_add(struct device *consumer,
                        switch (consumer->links.status) {
                        case DL_DEV_PROBING:
                                /*
-                                * Balance the decrementation of the supplier's
-                                * runtime PM usage counter after consumer probe
-                                * in driver_probe_device().
+                                * Some callers expect the link creation during
+                                * consumer driver probe to resume the supplier
+                                * even without DL_FLAG_RPM_ACTIVE.
                                 */
                                if (flags & DL_FLAG_PM_RUNTIME)
-                                       pm_runtime_get_sync(supplier);
+                                       pm_runtime_resume(supplier);
 
                                link->status = DL_STATE_CONSUMER_PROBE;
                                break;
diff --git a/drivers/base/dma-coherent.c b/drivers/base/dma-coherent.c
deleted file mode 100644 (file)
index 597d408..0000000
+++ /dev/null
@@ -1,434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Coherent per-device memory handling.
- * Borrowed from i386
- */
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/dma-mapping.h>
-
-struct dma_coherent_mem {
-       void            *virt_base;
-       dma_addr_t      device_base;
-       unsigned long   pfn_base;
-       int             size;
-       int             flags;
-       unsigned long   *bitmap;
-       spinlock_t      spinlock;
-       bool            use_dev_dma_pfn_offset;
-};
-
-static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init;
-
-static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev)
-{
-       if (dev && dev->dma_mem)
-               return dev->dma_mem;
-       return NULL;
-}
-
-static inline dma_addr_t dma_get_device_base(struct device *dev,
-                                            struct dma_coherent_mem * mem)
-{
-       if (mem->use_dev_dma_pfn_offset)
-               return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
-       else
-               return mem->device_base;
-}
-
-static int dma_init_coherent_memory(
-       phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags,
-       struct dma_coherent_mem **mem)
-{
-       struct dma_coherent_mem *dma_mem = NULL;
-       void __iomem *mem_base = NULL;
-       int pages = size >> PAGE_SHIFT;
-       int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
-       int ret;
-
-       if (!size) {
-               ret = -EINVAL;
-               goto out;
-       }
-
-       mem_base = memremap(phys_addr, size, MEMREMAP_WC);
-       if (!mem_base) {
-               ret = -EINVAL;
-               goto out;
-       }
-       dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
-       if (!dma_mem) {
-               ret = -ENOMEM;
-               goto out;
-       }
-       dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
-       if (!dma_mem->bitmap) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
-       dma_mem->virt_base = mem_base;
-       dma_mem->device_base = device_addr;
-       dma_mem->pfn_base = PFN_DOWN(phys_addr);
-       dma_mem->size = pages;
-       dma_mem->flags = flags;
-       spin_lock_init(&dma_mem->spinlock);
-
-       *mem = dma_mem;
-       return 0;
-
-out:
-       kfree(dma_mem);
-       if (mem_base)
-               memunmap(mem_base);
-       return ret;
-}
-
-static void dma_release_coherent_memory(struct dma_coherent_mem *mem)
-{
-       if (!mem)
-               return;
-
-       memunmap(mem->virt_base);
-       kfree(mem->bitmap);
-       kfree(mem);
-}
-
-static int dma_assign_coherent_memory(struct device *dev,
-                                     struct dma_coherent_mem *mem)
-{
-       if (!dev)
-               return -ENODEV;
-
-       if (dev->dma_mem)
-               return -EBUSY;
-
-       dev->dma_mem = mem;
-       return 0;
-}
-
-int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-                               dma_addr_t device_addr, size_t size, int flags)
-{
-       struct dma_coherent_mem *mem;
-       int ret;
-
-       ret = dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem);
-       if (ret)
-               return ret;
-
-       ret = dma_assign_coherent_memory(dev, mem);
-       if (ret)
-               dma_release_coherent_memory(mem);
-       return ret;
-}
-EXPORT_SYMBOL(dma_declare_coherent_memory);
-
-void dma_release_declared_memory(struct device *dev)
-{
-       struct dma_coherent_mem *mem = dev->dma_mem;
-
-       if (!mem)
-               return;
-       dma_release_coherent_memory(mem);
-       dev->dma_mem = NULL;
-}
-EXPORT_SYMBOL(dma_release_declared_memory);
-
-void *dma_mark_declared_memory_occupied(struct device *dev,
-                                       dma_addr_t device_addr, size_t size)
-{
-       struct dma_coherent_mem *mem = dev->dma_mem;
-       unsigned long flags;
-       int pos, err;
-
-       size += device_addr & ~PAGE_MASK;
-
-       if (!mem)
-               return ERR_PTR(-EINVAL);
-
-       spin_lock_irqsave(&mem->spinlock, flags);
-       pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
-       err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
-       spin_unlock_irqrestore(&mem->spinlock, flags);
-
-       if (err != 0)
-               return ERR_PTR(err);
-       return mem->virt_base + (pos << PAGE_SHIFT);
-}
-EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
-
-static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
-               ssize_t size, dma_addr_t *dma_handle)
-{
-       int order = get_order(size);
-       unsigned long flags;
-       int pageno;
-       void *ret;
-
-       spin_lock_irqsave(&mem->spinlock, flags);
-
-       if (unlikely(size > (mem->size << PAGE_SHIFT)))
-               goto err;
-
-       pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
-       if (unlikely(pageno < 0))
-               goto err;
-
-       /*
-        * Memory was found in the coherent area.
-        */
-       *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
-       ret = mem->virt_base + (pageno << PAGE_SHIFT);
-       spin_unlock_irqrestore(&mem->spinlock, flags);
-       memset(ret, 0, size);
-       return ret;
-err:
-       spin_unlock_irqrestore(&mem->spinlock, flags);
-       return NULL;
-}
-
-/**
- * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool
- * @dev:       device from which we allocate memory
- * @size:      size of requested memory area
- * @dma_handle:        This will be filled with the correct dma handle
- * @ret:       This pointer will be filled with the virtual address
- *             to allocated area.
- *
- * This function should be only called from per-arch dma_alloc_coherent()
- * to support allocation from per-device coherent memory pools.
- *
- * Returns 0 if dma_alloc_coherent should continue with allocating from
- * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
- */
-int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
-               dma_addr_t *dma_handle, void **ret)
-{
-       struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
-
-       if (!mem)
-               return 0;
-
-       *ret = __dma_alloc_from_coherent(mem, size, dma_handle);
-       if (*ret)
-               return 1;
-
-       /*
-        * In the case where the allocation can not be satisfied from the
-        * per-device area, try to fall back to generic memory if the
-        * constraints allow it.
-        */
-       return mem->flags & DMA_MEMORY_EXCLUSIVE;
-}
-EXPORT_SYMBOL(dma_alloc_from_dev_coherent);
-
-void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
-{
-       if (!dma_coherent_default_memory)
-               return NULL;
-
-       return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
-                       dma_handle);
-}
-
-static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
-                                      int order, void *vaddr)
-{
-       if (mem && vaddr >= mem->virt_base && vaddr <
-                  (mem->virt_base + (mem->size << PAGE_SHIFT))) {
-               int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-               unsigned long flags;
-
-               spin_lock_irqsave(&mem->spinlock, flags);
-               bitmap_release_region(mem->bitmap, page, order);
-               spin_unlock_irqrestore(&mem->spinlock, flags);
-               return 1;
-       }
-       return 0;
-}
-
-/**
- * dma_release_from_dev_coherent() - free memory to device coherent memory pool
- * @dev:       device from which the memory was allocated
- * @order:     the order of pages allocated
- * @vaddr:     virtual address of allocated pages
- *
- * This checks whether the memory was allocated from the per-device
- * coherent memory pool and if so, releases that memory.
- *
- * Returns 1 if we correctly released the memory, or 0 if the caller should
- * proceed with releasing memory from generic pools.
- */
-int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
-{
-       struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
-
-       return __dma_release_from_coherent(mem, order, vaddr);
-}
-EXPORT_SYMBOL(dma_release_from_dev_coherent);
-
-int dma_release_from_global_coherent(int order, void *vaddr)
-{
-       if (!dma_coherent_default_memory)
-               return 0;
-
-       return __dma_release_from_coherent(dma_coherent_default_memory, order,
-                       vaddr);
-}
-
-static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
-               struct vm_area_struct *vma, void *vaddr, size_t size, int *ret)
-{
-       if (mem && vaddr >= mem->virt_base && vaddr + size <=
-                  (mem->virt_base + (mem->size << PAGE_SHIFT))) {
-               unsigned long off = vma->vm_pgoff;
-               int start = (vaddr - mem->virt_base) >> PAGE_SHIFT;
-               int user_count = vma_pages(vma);
-               int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-               *ret = -ENXIO;
-               if (off < count && user_count <= count - off) {
-                       unsigned long pfn = mem->pfn_base + start + off;
-                       *ret = remap_pfn_range(vma, vma->vm_start, pfn,
-                                              user_count << PAGE_SHIFT,
-                                              vma->vm_page_prot);
-               }
-               return 1;
-       }
-       return 0;
-}
-
-/**
- * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool
- * @dev:       device from which the memory was allocated
- * @vma:       vm_area for the userspace memory
- * @vaddr:     cpu address returned by dma_alloc_from_dev_coherent
- * @size:      size of the memory buffer allocated
- * @ret:       result from remap_pfn_range()
- *
- * This checks whether the memory was allocated from the per-device
- * coherent memory pool and if so, maps that memory to the provided vma.
- *
- * Returns 1 if @vaddr belongs to the device coherent pool and the caller
- * should return @ret, or 0 if they should proceed with mapping memory from
- * generic areas.
- */
-int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
-                          void *vaddr, size_t size, int *ret)
-{
-       struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
-
-       return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
-}
-EXPORT_SYMBOL(dma_mmap_from_dev_coherent);
-
-int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
-                                  size_t size, int *ret)
-{
-       if (!dma_coherent_default_memory)
-               return 0;
-
-       return __dma_mmap_from_coherent(dma_coherent_default_memory, vma,
-                                       vaddr, size, ret);
-}
-
-/*
- * Support for reserved memory regions defined in device tree
- */
-#ifdef CONFIG_OF_RESERVED_MEM
-#include <linux/of.h>
-#include <linux/of_fdt.h>
-#include <linux/of_reserved_mem.h>
-
-static struct reserved_mem *dma_reserved_default_memory __initdata;
-
-static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
-{
-       struct dma_coherent_mem *mem = rmem->priv;
-       int ret;
-
-       if (!mem) {
-               ret = dma_init_coherent_memory(rmem->base, rmem->base,
-                                              rmem->size,
-                                              DMA_MEMORY_EXCLUSIVE, &mem);
-               if (ret) {
-                       pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
-                               &rmem->base, (unsigned long)rmem->size / SZ_1M);
-                       return ret;
-               }
-       }
-       mem->use_dev_dma_pfn_offset = true;
-       rmem->priv = mem;
-       dma_assign_coherent_memory(dev, mem);
-       return 0;
-}
-
-static void rmem_dma_device_release(struct reserved_mem *rmem,
-                                   struct device *dev)
-{
-       if (dev)
-               dev->dma_mem = NULL;
-}
-
-static const struct reserved_mem_ops rmem_dma_ops = {
-       .device_init    = rmem_dma_device_init,
-       .device_release = rmem_dma_device_release,
-};
-
-static int __init rmem_dma_setup(struct reserved_mem *rmem)
-{
-       unsigned long node = rmem->fdt_node;
-
-       if (of_get_flat_dt_prop(node, "reusable", NULL))
-               return -EINVAL;
-
-#ifdef CONFIG_ARM
-       if (!of_get_flat_dt_prop(node, "no-map", NULL)) {
-               pr_err("Reserved memory: regions without no-map are not yet supported\n");
-               return -EINVAL;
-       }
-
-       if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) {
-               WARN(dma_reserved_default_memory,
-                    "Reserved memory: region for default DMA coherent area is redefined\n");
-               dma_reserved_default_memory = rmem;
-       }
-#endif
-
-       rmem->ops = &rmem_dma_ops;
-       pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
-               &rmem->base, (unsigned long)rmem->size / SZ_1M);
-       return 0;
-}
-
-static int __init dma_init_reserved_memory(void)
-{
-       const struct reserved_mem_ops *ops;
-       int ret;
-
-       if (!dma_reserved_default_memory)
-               return -ENOMEM;
-
-       ops = dma_reserved_default_memory->ops;
-
-       /*
-        * We rely on rmem_dma_device_init() does not propagate error of
-        * dma_assign_coherent_memory() for "NULL" device.
-        */
-       ret = ops->device_init(dma_reserved_default_memory, NULL);
-
-       if (!ret) {
-               dma_coherent_default_memory = dma_reserved_default_memory->priv;
-               pr_info("DMA: default coherent area is set\n");
-       }
-
-       return ret;
-}
-
-core_initcall(dma_init_reserved_memory);
-
-RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
-#endif
diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
deleted file mode 100644 (file)
index d987dcd..0000000
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Contiguous Memory Allocator for DMA mapping framework
- * Copyright (c) 2010-2011 by Samsung Electronics.
- * Written by:
- *     Marek Szyprowski <m.szyprowski@samsung.com>
- *     Michal Nazarewicz <mina86@mina86.com>
- */
-
-#define pr_fmt(fmt) "cma: " fmt
-
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-#  define DEBUG
-#endif
-#endif
-
-#include <asm/page.h>
-#include <asm/dma-contiguous.h>
-
-#include <linux/memblock.h>
-#include <linux/err.h>
-#include <linux/sizes.h>
-#include <linux/dma-contiguous.h>
-#include <linux/cma.h>
-
-#ifdef CONFIG_CMA_SIZE_MBYTES
-#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
-#else
-#define CMA_SIZE_MBYTES 0
-#endif
-
-struct cma *dma_contiguous_default_area;
-
-/*
- * Default global CMA area size can be defined in kernel's .config.
- * This is useful mainly for distro maintainers to create a kernel
- * that works correctly for most supported systems.
- * The size can be set in bytes or as a percentage of the total memory
- * in the system.
- *
- * Users, who want to set the size of global CMA area for their system
- * should use cma= kernel parameter.
- */
-static const phys_addr_t size_bytes = (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
-static phys_addr_t size_cmdline = -1;
-static phys_addr_t base_cmdline;
-static phys_addr_t limit_cmdline;
-
-static int __init early_cma(char *p)
-{
-       pr_debug("%s(%s)\n", __func__, p);
-       size_cmdline = memparse(p, &p);
-       if (*p != '@')
-               return 0;
-       base_cmdline = memparse(p + 1, &p);
-       if (*p != '-') {
-               limit_cmdline = base_cmdline + size_cmdline;
-               return 0;
-       }
-       limit_cmdline = memparse(p + 1, &p);
-
-       return 0;
-}
-early_param("cma", early_cma);
-
-#ifdef CONFIG_CMA_SIZE_PERCENTAGE
-
-static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
-{
-       struct memblock_region *reg;
-       unsigned long total_pages = 0;
-
-       /*
-        * We cannot use memblock_phys_mem_size() here, because
-        * memblock_analyze() has not been called yet.
-        */
-       for_each_memblock(memory, reg)
-               total_pages += memblock_region_memory_end_pfn(reg) -
-                              memblock_region_memory_base_pfn(reg);
-
-       return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
-}
-
-#else
-
-static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
-{
-       return 0;
-}
-
-#endif
-
-/**
- * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
- * @limit: End address of the reserved memory (optional, 0 for any).
- *
- * This function reserves memory from early allocator. It should be
- * called by arch specific code once the early allocator (memblock or bootmem)
- * has been activated and all other subsystems have already allocated/reserved
- * memory.
- */
-void __init dma_contiguous_reserve(phys_addr_t limit)
-{
-       phys_addr_t selected_size = 0;
-       phys_addr_t selected_base = 0;
-       phys_addr_t selected_limit = limit;
-       bool fixed = false;
-
-       pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
-
-       if (size_cmdline != -1) {
-               selected_size = size_cmdline;
-               selected_base = base_cmdline;
-               selected_limit = min_not_zero(limit_cmdline, limit);
-               if (base_cmdline + size_cmdline == limit_cmdline)
-                       fixed = true;
-       } else {
-#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
-               selected_size = size_bytes;
-#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
-               selected_size = cma_early_percent_memory();
-#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
-               selected_size = min(size_bytes, cma_early_percent_memory());
-#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
-               selected_size = max(size_bytes, cma_early_percent_memory());
-#endif
-       }
-
-       if (selected_size && !dma_contiguous_default_area) {
-               pr_debug("%s: reserving %ld MiB for global area\n", __func__,
-                        (unsigned long)selected_size / SZ_1M);
-
-               dma_contiguous_reserve_area(selected_size, selected_base,
-                                           selected_limit,
-                                           &dma_contiguous_default_area,
-                                           fixed);
-       }
-}
-
-/**
- * dma_contiguous_reserve_area() - reserve custom contiguous area
- * @size: Size of the reserved area (in bytes),
- * @base: Base address of the reserved area optional, use 0 for any
- * @limit: End address of the reserved memory (optional, 0 for any).
- * @res_cma: Pointer to store the created cma region.
- * @fixed: hint about where to place the reserved area
- *
- * This function reserves memory from early allocator. It should be
- * called by arch specific code once the early allocator (memblock or bootmem)
- * has been activated and all other subsystems have already allocated/reserved
- * memory. This function allows to create custom reserved areas for specific
- * devices.
- *
- * If @fixed is true, reserve contiguous area at exactly @base.  If false,
- * reserve in range from @base to @limit.
- */
-int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
-                                      phys_addr_t limit, struct cma **res_cma,
-                                      bool fixed)
-{
-       int ret;
-
-       ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed,
-                                       "reserved", res_cma);
-       if (ret)
-               return ret;
-
-       /* Architecture specific contiguous memory fixup. */
-       dma_contiguous_early_fixup(cma_get_base(*res_cma),
-                               cma_get_size(*res_cma));
-
-       return 0;
-}
-
-/**
- * dma_alloc_from_contiguous() - allocate pages from contiguous area
- * @dev:   Pointer to device for which the allocation is performed.
- * @count: Requested number of pages.
- * @align: Requested alignment of pages (in PAGE_SIZE order).
- * @gfp_mask: GFP flags to use for this allocation.
- *
- * This function allocates memory buffer for specified device. It uses
- * device specific contiguous memory area if available or the default
- * global one. Requires architecture specific dev_get_cma_area() helper
- * function.
- */
-struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
-                                      unsigned int align, gfp_t gfp_mask)
-{
-       if (align > CONFIG_CMA_ALIGNMENT)
-               align = CONFIG_CMA_ALIGNMENT;
-
-       return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask);
-}
-
-/**
- * dma_release_from_contiguous() - release allocated pages
- * @dev:   Pointer to device for which the pages were allocated.
- * @pages: Allocated pages.
- * @count: Number of allocated pages.
- *
- * This function releases memory allocated by dma_alloc_from_contiguous().
- * It returns false when provided pages do not belong to contiguous area and
- * true otherwise.
- */
-bool dma_release_from_contiguous(struct device *dev, struct page *pages,
-                                int count)
-{
-       return cma_release(dev_get_cma_area(dev), pages, count);
-}
-
-/*
- * Support for reserved memory regions defined in device tree
- */
-#ifdef CONFIG_OF_RESERVED_MEM
-#include <linux/of.h>
-#include <linux/of_fdt.h>
-#include <linux/of_reserved_mem.h>
-
-#undef pr_fmt
-#define pr_fmt(fmt) fmt
-
-static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
-{
-       dev_set_cma_area(dev, rmem->priv);
-       return 0;
-}
-
-static void rmem_cma_device_release(struct reserved_mem *rmem,
-                                   struct device *dev)
-{
-       dev_set_cma_area(dev, NULL);
-}
-
-static const struct reserved_mem_ops rmem_cma_ops = {
-       .device_init    = rmem_cma_device_init,
-       .device_release = rmem_cma_device_release,
-};
-
-static int __init rmem_cma_setup(struct reserved_mem *rmem)
-{
-       phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
-       phys_addr_t mask = align - 1;
-       unsigned long node = rmem->fdt_node;
-       struct cma *cma;
-       int err;
-
-       if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
-           of_get_flat_dt_prop(node, "no-map", NULL))
-               return -EINVAL;
-
-       if ((rmem->base & mask) || (rmem->size & mask)) {
-               pr_err("Reserved memory: incorrect alignment of CMA region\n");
-               return -EINVAL;
-       }
-
-       err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
-       if (err) {
-               pr_err("Reserved memory: unable to setup CMA region\n");
-               return err;
-       }
-       /* Architecture specific contiguous memory fixup. */
-       dma_contiguous_early_fixup(rmem->base, rmem->size);
-
-       if (of_get_flat_dt_prop(node, "linux,cma-default", NULL))
-               dma_contiguous_set_default(cma);
-
-       rmem->ops = &rmem_cma_ops;
-       rmem->priv = cma;
-
-       pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
-               &rmem->base, (unsigned long)rmem->size / SZ_1M);
-
-       return 0;
-}
-RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
-#endif
diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c
deleted file mode 100644 (file)
index f831a58..0000000
+++ /dev/null
@@ -1,345 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * drivers/base/dma-mapping.c - arch-independent dma-mapping routines
- *
- * Copyright (c) 2006  SUSE Linux Products GmbH
- * Copyright (c) 2006  Tejun Heo <teheo@suse.de>
- */
-
-#include <linux/acpi.h>
-#include <linux/dma-mapping.h>
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/of_device.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-
-/*
- * Managed DMA API
- */
-struct dma_devres {
-       size_t          size;
-       void            *vaddr;
-       dma_addr_t      dma_handle;
-       unsigned long   attrs;
-};
-
-static void dmam_release(struct device *dev, void *res)
-{
-       struct dma_devres *this = res;
-
-       dma_free_attrs(dev, this->size, this->vaddr, this->dma_handle,
-                       this->attrs);
-}
-
-static int dmam_match(struct device *dev, void *res, void *match_data)
-{
-       struct dma_devres *this = res, *match = match_data;
-
-       if (this->vaddr == match->vaddr) {
-               WARN_ON(this->size != match->size ||
-                       this->dma_handle != match->dma_handle);
-               return 1;
-       }
-       return 0;
-}
-
-/**
- * dmam_alloc_coherent - Managed dma_alloc_coherent()
- * @dev: Device to allocate coherent memory for
- * @size: Size of allocation
- * @dma_handle: Out argument for allocated DMA handle
- * @gfp: Allocation flags
- *
- * Managed dma_alloc_coherent().  Memory allocated using this function
- * will be automatically released on driver detach.
- *
- * RETURNS:
- * Pointer to allocated memory on success, NULL on failure.
- */
-void *dmam_alloc_coherent(struct device *dev, size_t size,
-                          dma_addr_t *dma_handle, gfp_t gfp)
-{
-       struct dma_devres *dr;
-       void *vaddr;
-
-       dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
-       if (!dr)
-               return NULL;
-
-       vaddr = dma_alloc_coherent(dev, size, dma_handle, gfp);
-       if (!vaddr) {
-               devres_free(dr);
-               return NULL;
-       }
-
-       dr->vaddr = vaddr;
-       dr->dma_handle = *dma_handle;
-       dr->size = size;
-
-       devres_add(dev, dr);
-
-       return vaddr;
-}
-EXPORT_SYMBOL(dmam_alloc_coherent);
-
-/**
- * dmam_free_coherent - Managed dma_free_coherent()
- * @dev: Device to free coherent memory for
- * @size: Size of allocation
- * @vaddr: Virtual address of the memory to free
- * @dma_handle: DMA handle of the memory to free
- *
- * Managed dma_free_coherent().
- */
-void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
-                       dma_addr_t dma_handle)
-{
-       struct dma_devres match_data = { size, vaddr, dma_handle };
-
-       dma_free_coherent(dev, size, vaddr, dma_handle);
-       WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data));
-}
-EXPORT_SYMBOL(dmam_free_coherent);
-
-/**
- * dmam_alloc_attrs - Managed dma_alloc_attrs()
- * @dev: Device to allocate non_coherent memory for
- * @size: Size of allocation
- * @dma_handle: Out argument for allocated DMA handle
- * @gfp: Allocation flags
- * @attrs: Flags in the DMA_ATTR_* namespace.
- *
- * Managed dma_alloc_attrs().  Memory allocated using this function will be
- * automatically released on driver detach.
- *
- * RETURNS:
- * Pointer to allocated memory on success, NULL on failure.
- */
-void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
-{
-       struct dma_devres *dr;
-       void *vaddr;
-
-       dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
-       if (!dr)
-               return NULL;
-
-       vaddr = dma_alloc_attrs(dev, size, dma_handle, gfp, attrs);
-       if (!vaddr) {
-               devres_free(dr);
-               return NULL;
-       }
-
-       dr->vaddr = vaddr;
-       dr->dma_handle = *dma_handle;
-       dr->size = size;
-       dr->attrs = attrs;
-
-       devres_add(dev, dr);
-
-       return vaddr;
-}
-EXPORT_SYMBOL(dmam_alloc_attrs);
-
-#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
-
-static void dmam_coherent_decl_release(struct device *dev, void *res)
-{
-       dma_release_declared_memory(dev);
-}
-
-/**
- * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory()
- * @dev: Device to declare coherent memory for
- * @phys_addr: Physical address of coherent memory to be declared
- * @device_addr: Device address of coherent memory to be declared
- * @size: Size of coherent memory to be declared
- * @flags: Flags
- *
- * Managed dma_declare_coherent_memory().
- *
- * RETURNS:
- * 0 on success, -errno on failure.
- */
-int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
-                                dma_addr_t device_addr, size_t size, int flags)
-{
-       void *res;
-       int rc;
-
-       res = devres_alloc(dmam_coherent_decl_release, 0, GFP_KERNEL);
-       if (!res)
-               return -ENOMEM;
-
-       rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
-                                        flags);
-       if (!rc)
-               devres_add(dev, res);
-       else
-               devres_free(res);
-
-       return rc;
-}
-EXPORT_SYMBOL(dmam_declare_coherent_memory);
-
-/**
- * dmam_release_declared_memory - Managed dma_release_declared_memory().
- * @dev: Device to release declared coherent memory for
- *
- * Managed dmam_release_declared_memory().
- */
-void dmam_release_declared_memory(struct device *dev)
-{
-       WARN_ON(devres_destroy(dev, dmam_coherent_decl_release, NULL, NULL));
-}
-EXPORT_SYMBOL(dmam_release_declared_memory);
-
-#endif
-
-/*
- * Create scatter-list for the already allocated DMA buffer.
- */
-int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
-                void *cpu_addr, dma_addr_t handle, size_t size)
-{
-       struct page *page = virt_to_page(cpu_addr);
-       int ret;
-
-       ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
-       if (unlikely(ret))
-               return ret;
-
-       sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
-       return 0;
-}
-EXPORT_SYMBOL(dma_common_get_sgtable);
-
-/*
- * Create userspace mapping for the DMA-coherent memory.
- */
-int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
-                   void *cpu_addr, dma_addr_t dma_addr, size_t size)
-{
-       int ret = -ENXIO;
-#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP
-       unsigned long user_count = vma_pages(vma);
-       unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-       unsigned long off = vma->vm_pgoff;
-
-       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-
-       if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
-               return ret;
-
-       if (off < count && user_count <= (count - off))
-               ret = remap_pfn_range(vma, vma->vm_start,
-                                     page_to_pfn(virt_to_page(cpu_addr)) + off,
-                                     user_count << PAGE_SHIFT,
-                                     vma->vm_page_prot);
-#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
-
-       return ret;
-}
-EXPORT_SYMBOL(dma_common_mmap);
-
-#ifdef CONFIG_MMU
-static struct vm_struct *__dma_common_pages_remap(struct page **pages,
-                       size_t size, unsigned long vm_flags, pgprot_t prot,
-                       const void *caller)
-{
-       struct vm_struct *area;
-
-       area = get_vm_area_caller(size, vm_flags, caller);
-       if (!area)
-               return NULL;
-
-       if (map_vm_area(area, prot, pages)) {
-               vunmap(area->addr);
-               return NULL;
-       }
-
-       return area;
-}
-
-/*
- * remaps an array of PAGE_SIZE pages into another vm_area
- * Cannot be used in non-sleeping contexts
- */
-void *dma_common_pages_remap(struct page **pages, size_t size,
-                       unsigned long vm_flags, pgprot_t prot,
-                       const void *caller)
-{
-       struct vm_struct *area;
-
-       area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
-       if (!area)
-               return NULL;
-
-       area->pages = pages;
-
-       return area->addr;
-}
-
-/*
- * remaps an allocated contiguous region into another vm_area.
- * Cannot be used in non-sleeping contexts
- */
-
-void *dma_common_contiguous_remap(struct page *page, size_t size,
-                       unsigned long vm_flags,
-                       pgprot_t prot, const void *caller)
-{
-       int i;
-       struct page **pages;
-       struct vm_struct *area;
-
-       pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
-       if (!pages)
-               return NULL;
-
-       for (i = 0; i < (size >> PAGE_SHIFT); i++)
-               pages[i] = nth_page(page, i);
-
-       area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
-
-       kfree(pages);
-
-       if (!area)
-               return NULL;
-       return area->addr;
-}
-
-/*
- * unmaps a range previously mapped by dma_common_*_remap
- */
-void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
-{
-       struct vm_struct *area = find_vm_area(cpu_addr);
-
-       if (!area || (area->flags & vm_flags) != vm_flags) {
-               WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
-               return;
-       }
-
-       unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
-       vunmap(cpu_addr);
-}
-#endif
-
-/*
- * enables DMA API use for a device
- */
-int dma_configure(struct device *dev)
-{
-       if (dev->bus->dma_configure)
-               return dev->bus->dma_configure(dev);
-       return 0;
-}
-
-void dma_deconfigure(struct device *dev)
-{
-       of_dma_deconfigure(dev);
-       acpi_dma_deconfigure(dev);
-}
index 3b7083b..74a0556 100644 (file)
@@ -76,6 +76,7 @@ struct link_dead_args {
 #define NBD_HAS_CONFIG_REF             4
 #define NBD_BOUND                      5
 #define NBD_DESTROY_ON_DISCONNECT      6
+#define NBD_DISCONNECT_ON_CLOSE        7
 
 struct nbd_config {
        u32 flags;
@@ -138,6 +139,7 @@ static void nbd_config_put(struct nbd_device *nbd);
 static void nbd_connect_reply(struct genl_info *info, int index);
 static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info);
 static void nbd_dead_link_work(struct work_struct *work);
+static void nbd_disconnect_and_put(struct nbd_device *nbd);
 
 static inline struct device *nbd_to_dev(struct nbd_device *nbd)
 {
@@ -1305,6 +1307,12 @@ out:
 static void nbd_release(struct gendisk *disk, fmode_t mode)
 {
        struct nbd_device *nbd = disk->private_data;
+       struct block_device *bdev = bdget_disk(disk, 0);
+
+       if (test_bit(NBD_DISCONNECT_ON_CLOSE, &nbd->config->runtime_flags) &&
+                       bdev->bd_openers == 0)
+               nbd_disconnect_and_put(nbd);
+
        nbd_config_put(nbd);
        nbd_put(nbd);
 }
@@ -1705,6 +1713,10 @@ again:
                                &config->runtime_flags);
                        put_dev = true;
                }
+               if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                               &config->runtime_flags);
+               }
        }
 
        if (info->attrs[NBD_ATTR_SOCKETS]) {
@@ -1749,6 +1761,17 @@ out:
        return ret;
 }
 
+static void nbd_disconnect_and_put(struct nbd_device *nbd)
+{
+       mutex_lock(&nbd->config_lock);
+       nbd_disconnect(nbd);
+       nbd_clear_sock(nbd);
+       mutex_unlock(&nbd->config_lock);
+       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
+                              &nbd->config->runtime_flags))
+               nbd_config_put(nbd);
+}
+
 static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
 {
        struct nbd_device *nbd;
@@ -1781,13 +1804,7 @@ static int nbd_genl_disconnect(struct sk_buff *skb, struct genl_info *info)
                nbd_put(nbd);
                return 0;
        }
-       mutex_lock(&nbd->config_lock);
-       nbd_disconnect(nbd);
-       nbd_clear_sock(nbd);
-       mutex_unlock(&nbd->config_lock);
-       if (test_and_clear_bit(NBD_HAS_CONFIG_REF,
-                              &nbd->config->runtime_flags))
-               nbd_config_put(nbd);
+       nbd_disconnect_and_put(nbd);
        nbd_config_put(nbd);
        nbd_put(nbd);
        return 0;
@@ -1798,7 +1815,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
        struct nbd_device *nbd = NULL;
        struct nbd_config *config;
        int index;
-       int ret = -EINVAL;
+       int ret = 0;
        bool put_dev = false;
 
        if (!netlink_capable(skb, CAP_SYS_ADMIN))
@@ -1838,6 +1855,7 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
            !nbd->task_recv) {
                dev_err(nbd_to_dev(nbd),
                        "not configured, cannot reconfigure\n");
+               ret = -EINVAL;
                goto out;
        }
 
@@ -1862,6 +1880,14 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
                                               &config->runtime_flags))
                                refcount_inc(&nbd->refs);
                }
+
+               if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
+                       set_bit(NBD_DISCONNECT_ON_CLOSE,
+                                       &config->runtime_flags);
+               } else {
+                       clear_bit(NBD_DISCONNECT_ON_CLOSE,
+                                       &config->runtime_flags);
+               }
        }
 
        if (info->attrs[NBD_ATTR_SOCKETS]) {
index 7948049..042c778 100644 (file)
@@ -1365,7 +1365,7 @@ static blk_qc_t null_queue_bio(struct request_queue *q, struct bio *bio)
 static enum blk_eh_timer_return null_rq_timed_out_fn(struct request *rq)
 {
        pr_info("null: rq %p timed out\n", rq);
-       blk_mq_complete_request(rq);
+       __blk_complete_request(rq);
        return BLK_EH_DONE;
 }
 
index 14d159e..2dc33e6 100644 (file)
@@ -29,7 +29,7 @@
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/types.h>
-#include <linux/unaligned/le_struct.h>
+#include <asm/unaligned.h>
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
 
index 91bb98c..aaf9e5a 100644 (file)
@@ -516,11 +516,18 @@ EXPORT_SYMBOL_GPL(hwrng_register);
 
 void hwrng_unregister(struct hwrng *rng)
 {
+       int err;
+
        mutex_lock(&rng_mutex);
 
        list_del(&rng->list);
-       if (current_rng == rng)
-               enable_best_rng();
+       if (current_rng == rng) {
+               err = enable_best_rng();
+               if (err) {
+                       drop_current_rng();
+                       cur_rng_set_by_user = 0;
+               }
+       }
 
        if (list_empty(&rng_list)) {
                mutex_unlock(&rng_mutex);
index e5cdc3a..2717f88 100644 (file)
@@ -304,8 +304,10 @@ static int __init stm32_timer_init(struct device_node *node)
 
        to->private_data = kzalloc(sizeof(struct stm32_timer_private),
                                   GFP_KERNEL);
-       if (!to->private_data)
+       if (!to->private_data) {
+               ret = -ENOMEM;
                goto deinit;
+       }
 
        rstc = of_reset_control_get(node, NULL);
        if (!IS_ERR(rstc)) {
index 1de5ec8..ece120d 100644 (file)
@@ -294,6 +294,7 @@ struct pstate_funcs {
 static struct pstate_funcs pstate_funcs __read_mostly;
 
 static int hwp_active __read_mostly;
+static int hwp_mode_bdw __read_mostly;
 static bool per_cpu_limits __read_mostly;
 static bool hwp_boost __read_mostly;
 
@@ -1413,7 +1414,15 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
        cpu->pstate.turbo_pstate = pstate_funcs.get_turbo();
        cpu->pstate.scaling = pstate_funcs.get_scaling();
        cpu->pstate.max_freq = cpu->pstate.max_pstate * cpu->pstate.scaling;
-       cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+
+       if (hwp_active && !hwp_mode_bdw) {
+               unsigned int phy_max, current_max;
+
+               intel_pstate_get_hwp_max(cpu->cpu, &phy_max, &current_max);
+               cpu->pstate.turbo_freq = phy_max * cpu->pstate.scaling;
+       } else {
+               cpu->pstate.turbo_freq = cpu->pstate.turbo_pstate * cpu->pstate.scaling;
+       }
 
        if (pstate_funcs.get_aperf_mperf_shift)
                cpu->aperf_mperf_shift = pstate_funcs.get_aperf_mperf_shift();
@@ -2467,28 +2476,36 @@ static inline bool intel_pstate_has_acpi_ppc(void) { return false; }
 static inline void intel_pstate_request_control_from_smm(void) {}
 #endif /* CONFIG_ACPI */
 
+#define INTEL_PSTATE_HWP_BROADWELL     0x01
+
+#define ICPU_HWP(model, hwp_mode) \
+       { X86_VENDOR_INTEL, 6, model, X86_FEATURE_HWP, hwp_mode }
+
 static const struct x86_cpu_id hwp_support_ids[] __initconst = {
-       { X86_VENDOR_INTEL, 6, X86_MODEL_ANY, X86_FEATURE_HWP },
+       ICPU_HWP(INTEL_FAM6_BROADWELL_X, INTEL_PSTATE_HWP_BROADWELL),
+       ICPU_HWP(INTEL_FAM6_BROADWELL_XEON_D, INTEL_PSTATE_HWP_BROADWELL),
+       ICPU_HWP(X86_MODEL_ANY, 0),
        {}
 };
 
 static int __init intel_pstate_init(void)
 {
+       const struct x86_cpu_id *id;
        int rc;
 
        if (no_load)
                return -ENODEV;
 
-       if (x86_match_cpu(hwp_support_ids)) {
+       id = x86_match_cpu(hwp_support_ids);
+       if (id) {
                copy_cpu_funcs(&core_funcs);
                if (!no_hwp) {
                        hwp_active++;
+                       hwp_mode_bdw = id->driver_data;
                        intel_pstate.attr = hwp_cpufreq_attrs;
                        goto hwp_cpu_matched;
                }
        } else {
-               const struct x86_cpu_id *id;
-
                id = x86_match_cpu(intel_pstate_cpu_ids);
                if (!id)
                        return -ENODEV;
index d049fe4..01bddac 100644 (file)
@@ -42,6 +42,8 @@ enum _msm8996_version {
        NUM_OF_MSM8996_VERSIONS,
 };
 
+struct platform_device *cpufreq_dt_pdev, *kryo_cpufreq_pdev;
+
 static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
 {
        size_t len;
@@ -74,7 +76,6 @@ static enum _msm8996_version __init qcom_cpufreq_kryo_get_msm_id(void)
 static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
 {
        struct opp_table *opp_tables[NR_CPUS] = {0};
-       struct platform_device *cpufreq_dt_pdev;
        enum _msm8996_version msm8996_version;
        struct nvmem_cell *speedbin_nvmem;
        struct device_node *np;
@@ -115,6 +116,8 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
 
        speedbin = nvmem_cell_read(speedbin_nvmem, &len);
        nvmem_cell_put(speedbin_nvmem);
+       if (IS_ERR(speedbin))
+               return PTR_ERR(speedbin);
 
        switch (msm8996_version) {
        case MSM8996_V3:
@@ -127,6 +130,7 @@ static int qcom_cpufreq_kryo_probe(struct platform_device *pdev)
                BUG();
                break;
        }
+       kfree(speedbin);
 
        for_each_possible_cpu(cpu) {
                cpu_dev = get_cpu_device(cpu);
@@ -162,8 +166,15 @@ free_opp:
        return ret;
 }
 
+static int qcom_cpufreq_kryo_remove(struct platform_device *pdev)
+{
+       platform_device_unregister(cpufreq_dt_pdev);
+       return 0;
+}
+
 static struct platform_driver qcom_cpufreq_kryo_driver = {
        .probe = qcom_cpufreq_kryo_probe,
+       .remove = qcom_cpufreq_kryo_remove,
        .driver = {
                .name = "qcom-cpufreq-kryo",
        },
@@ -198,8 +209,9 @@ static int __init qcom_cpufreq_kryo_init(void)
        if (unlikely(ret < 0))
                return ret;
 
-       ret = PTR_ERR_OR_ZERO(platform_device_register_simple(
-               "qcom-cpufreq-kryo", -1, NULL, 0));
+       kryo_cpufreq_pdev = platform_device_register_simple(
+               "qcom-cpufreq-kryo", -1, NULL, 0);
+       ret = PTR_ERR_OR_ZERO(kryo_cpufreq_pdev);
        if (0 == ret)
                return 0;
 
@@ -208,5 +220,12 @@ static int __init qcom_cpufreq_kryo_init(void)
 }
 module_init(qcom_cpufreq_kryo_init);
 
+static void __init qcom_cpufreq_kryo_exit(void)
+{
+       platform_device_unregister(kryo_cpufreq_pdev);
+       platform_driver_unregister(&qcom_cpufreq_kryo_driver);
+}
+module_exit(qcom_cpufreq_kryo_exit);
+
 MODULE_DESCRIPTION("Qualcomm Technologies, Inc. Kryo CPUfreq driver");
 MODULE_LICENSE("GPL v2");
index 00c7aab..afebbd8 100644 (file)
@@ -1548,15 +1548,14 @@ skip_copy:
                        tp->urg_data = 0;
 
                if ((avail + offset) >= skb->len) {
-                       if (likely(skb))
-                               chtls_free_skb(sk, skb);
-                       buffers_freed++;
                        if (ULP_SKB_CB(skb)->flags & ULPCB_FLAG_TLS_HDR) {
                                tp->copied_seq += skb->len;
                                hws->rcvpld = skb->hdr_len;
                        } else {
                                tp->copied_seq += hws->rcvpld;
                        }
+                       chtls_free_skb(sk, skb);
+                       buffers_freed++;
                        hws->copied_seq = 0;
                        if (copied >= target &&
                            !skb_peek(&sk->sk_receive_queue))
index 3317d15..6e5284e 100644 (file)
@@ -2158,10 +2158,18 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
        switch (asic_type) {
 #if defined(CONFIG_DRM_AMD_DC)
        case CHIP_BONAIRE:
-       case CHIP_HAWAII:
        case CHIP_KAVERI:
        case CHIP_KABINI:
        case CHIP_MULLINS:
+               /*
+                * We have systems in the wild with these ASICs that require
+                * LVDS and VGA support which is not supported with DC.
+                *
+                * Fallback to the non-DC driver here by default so as not to
+                * cause regressions.
+                */
+               return amdgpu_dc > 0;
+       case CHIP_HAWAII:
        case CHIP_CARRIZO:
        case CHIP_STONEY:
        case CHIP_POLARIS10:
index 5e4e1bd..3526efa 100644 (file)
@@ -762,8 +762,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
        domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
        if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
                adev->vram_pin_size += amdgpu_bo_size(bo);
-               if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-                       adev->invisible_pin_size += amdgpu_bo_size(bo);
+               adev->invisible_pin_size += amdgpu_vram_mgr_bo_invisible_size(bo);
        } else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
                adev->gart_pin_size += amdgpu_bo_size(bo);
        }
@@ -790,25 +789,22 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
        bo->pin_count--;
        if (bo->pin_count)
                return 0;
-       for (i = 0; i < bo->placement.num_placement; i++) {
-               bo->placements[i].lpfn = 0;
-               bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
-       }
-       r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-       if (unlikely(r)) {
-               dev_err(adev->dev, "%p validate failed for unpin\n", bo);
-               goto error;
-       }
 
        if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
                adev->vram_pin_size -= amdgpu_bo_size(bo);
-               if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
-                       adev->invisible_pin_size -= amdgpu_bo_size(bo);
+               adev->invisible_pin_size -= amdgpu_vram_mgr_bo_invisible_size(bo);
        } else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
                adev->gart_pin_size -= amdgpu_bo_size(bo);
        }
 
-error:
+       for (i = 0; i < bo->placement.num_placement; i++) {
+               bo->placements[i].lpfn = 0;
+               bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
+       }
+       r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+       if (unlikely(r))
+               dev_err(adev->dev, "%p validate failed for unpin\n", bo);
+
        return r;
 }
 
index e969c87..e5da465 100644 (file)
@@ -73,6 +73,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem);
 uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
 int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);
 
+u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);
 
index bcf68f8..3ff08e3 100644 (file)
@@ -130,7 +130,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
        unsigned version_major, version_minor, family_id;
        int i, j, r;
 
-       INIT_DELAYED_WORK(&adev->uvd.inst->idle_work, amdgpu_uvd_idle_work_handler);
+       INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
 
        switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
@@ -314,12 +314,12 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
        void *ptr;
        int i, j;
 
+       cancel_delayed_work_sync(&adev->uvd.idle_work);
+
        for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
                if (adev->uvd.inst[j].vcpu_bo == NULL)
                        continue;
 
-               cancel_delayed_work_sync(&adev->uvd.inst[j].idle_work);
-
                /* only valid for physical mode */
                if (adev->asic_type < CHIP_POLARIS10) {
                        for (i = 0; i < adev->uvd.max_handles; ++i)
@@ -1145,7 +1145,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 {
        struct amdgpu_device *adev =
-               container_of(work, struct amdgpu_device, uvd.inst->idle_work.work);
+               container_of(work, struct amdgpu_device, uvd.idle_work.work);
        unsigned fences = 0, i, j;
 
        for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
@@ -1167,7 +1167,7 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
                                                               AMD_CG_STATE_GATE);
                }
        } else {
-               schedule_delayed_work(&adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
+               schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
        }
 }
 
@@ -1179,7 +1179,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
        if (amdgpu_sriov_vf(adev))
                return;
 
-       set_clocks = !cancel_delayed_work_sync(&adev->uvd.inst->idle_work);
+       set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
        if (set_clocks) {
                if (adev->pm.dpm_enabled) {
                        amdgpu_dpm_enable_uvd(adev, true);
@@ -1196,7 +1196,7 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
 {
        if (!amdgpu_sriov_vf(ring->adev))
-               schedule_delayed_work(&ring->adev->uvd.inst->idle_work, UVD_IDLE_TIMEOUT);
+               schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
 }
 
 /**
index b1579fb..8b23a1b 100644 (file)
@@ -44,7 +44,6 @@ struct amdgpu_uvd_inst {
        void                    *saved_bo;
        atomic_t                handles[AMDGPU_MAX_UVD_HANDLES];
        struct drm_file         *filp[AMDGPU_MAX_UVD_HANDLES];
-       struct delayed_work     idle_work;
        struct amdgpu_ring      ring;
        struct amdgpu_ring      ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
        struct amdgpu_irq_src   irq;
@@ -62,6 +61,7 @@ struct amdgpu_uvd {
        bool                    address_64_bit;
        bool                    use_ctx_buf;
        struct amdgpu_uvd_inst          inst[AMDGPU_MAX_UVD_INSTANCES];
+       struct delayed_work     idle_work;
 };
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
index 9aca653..b6333f9 100644 (file)
@@ -96,6 +96,38 @@ static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
                adev->gmc.visible_vram_size : end) - start;
 }
 
+/**
+ * amdgpu_vram_mgr_bo_invisible_size - CPU invisible BO size
+ *
+ * @bo: &amdgpu_bo buffer object (must be in VRAM)
+ *
+ * Returns:
+ * How much of the given &amdgpu_bo buffer object lies in CPU invisible VRAM.
+ */
+u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo)
+{
+       struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+       struct ttm_mem_reg *mem = &bo->tbo.mem;
+       struct drm_mm_node *nodes = mem->mm_node;
+       unsigned pages = mem->num_pages;
+       u64 usage = 0;
+
+       if (adev->gmc.visible_vram_size == adev->gmc.real_vram_size)
+               return 0;
+
+       if (mem->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
+               return amdgpu_bo_size(bo);
+
+       while (nodes && pages) {
+               usage += nodes->size << PAGE_SHIFT;
+               usage -= amdgpu_vram_mgr_vis_size(adev, nodes);
+               pages -= nodes->size;
+               ++nodes;
+       }
+
+       return usage;
+}
+
 /**
  * amdgpu_vram_mgr_new - allocate new ranges
  *
@@ -135,7 +167,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
                num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
        }
 
-       nodes = kcalloc(num_nodes, sizeof(*nodes), GFP_KERNEL);
+       nodes = kvmalloc_array(num_nodes, sizeof(*nodes),
+                              GFP_KERNEL | __GFP_ZERO);
        if (!nodes)
                return -ENOMEM;
 
@@ -190,7 +223,7 @@ error:
                drm_mm_remove_node(&nodes[i]);
        spin_unlock(&mgr->lock);
 
-       kfree(nodes);
+       kvfree(nodes);
        return r == -ENOSPC ? 0 : r;
 }
 
@@ -229,7 +262,7 @@ static void amdgpu_vram_mgr_del(struct ttm_mem_type_manager *man,
        atomic64_sub(usage, &mgr->usage);
        atomic64_sub(vis_usage, &mgr->vis_usage);
 
-       kfree(mem->mm_node);
+       kvfree(mem->mm_node);
        mem->mm_node = NULL;
 }
 
index dbe4b1f..2236487 100644 (file)
@@ -1090,7 +1090,7 @@ static int vega10_disable_se_edc_config(struct pp_hwmgr *hwmgr)
 static int vega10_enable_psm_gc_edc_config(struct pp_hwmgr *hwmgr)
 {
        struct amdgpu_device *adev = hwmgr->adev;
-       int result;
+       int result = 0;
        uint32_t num_se = 0;
        uint32_t count, data;
 
index 73c875d..47e0992 100644 (file)
@@ -839,7 +839,7 @@ static int atmel_hlcdc_plane_init_properties(struct atmel_hlcdc_plane *plane)
                        return ret;
        }
 
-       if (desc->layout.xstride && desc->layout.pstride) {
+       if (desc->layout.xstride[0] && desc->layout.pstride[0]) {
                int ret;
 
                ret = drm_plane_create_rotation_property(&plane->base,
index 7ab3604..250effa 100644 (file)
 
 #define SII8620_BURST_BUF_LEN 288
 #define VAL_RX_HDMI_CTRL2_DEFVAL VAL_RX_HDMI_CTRL2_IDLE_CNT(3)
-#define MHL1_MAX_LCLK 225000
-#define MHL3_MAX_LCLK 600000
+
+#define MHL1_MAX_PCLK 75000
+#define MHL1_MAX_PCLK_PP_MODE 150000
+#define MHL3_MAX_PCLK 200000
+#define MHL3_MAX_PCLK_PP_MODE 300000
 
 enum sii8620_mode {
        CM_DISCONNECTED,
@@ -80,6 +83,9 @@ struct sii8620 {
        u8 devcap[MHL_DCAP_SIZE];
        u8 xdevcap[MHL_XDC_SIZE];
        u8 avif[HDMI_INFOFRAME_SIZE(AVI)];
+       bool feature_complete;
+       bool devcap_read;
+       bool sink_detected;
        struct edid *edid;
        unsigned int gen2_write_burst:1;
        enum sii8620_mt_state mt_state;
@@ -476,7 +482,7 @@ static void sii8620_update_array(u8 *dst, u8 *src, int count)
        }
 }
 
-static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
+static void sii8620_identify_sink(struct sii8620 *ctx)
 {
        static const char * const sink_str[] = {
                [SINK_NONE] = "NONE",
@@ -487,7 +493,7 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
        char sink_name[20];
        struct device *dev = ctx->dev;
 
-       if (ret < 0)
+       if (!ctx->sink_detected || !ctx->devcap_read)
                return;
 
        sii8620_fetch_edid(ctx);
@@ -496,6 +502,7 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
                sii8620_mhl_disconnected(ctx);
                return;
        }
+       sii8620_set_upstream_edid(ctx);
 
        if (drm_detect_hdmi_monitor(ctx->edid))
                ctx->sink_type = SINK_HDMI;
@@ -508,53 +515,6 @@ static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
                 sink_str[ctx->sink_type], sink_name);
 }
 
-static void sii8620_hsic_init(struct sii8620 *ctx)
-{
-       if (!sii8620_is_mhl3(ctx))
-               return;
-
-       sii8620_write(ctx, REG_FCGC,
-               BIT_FCGC_HSIC_HOSTMODE | BIT_FCGC_HSIC_ENABLE);
-       sii8620_setbits(ctx, REG_HRXCTRL3,
-               BIT_HRXCTRL3_HRX_STAY_RESET | BIT_HRXCTRL3_STATUS_EN, ~0);
-       sii8620_setbits(ctx, REG_TTXNUMB, MSK_TTXNUMB_TTX_NUMBPS, 4);
-       sii8620_setbits(ctx, REG_TRXCTRL, BIT_TRXCTRL_TRX_FROM_SE_COC, ~0);
-       sii8620_setbits(ctx, REG_HTXCTRL, BIT_HTXCTRL_HTX_DRVCONN1, 0);
-       sii8620_setbits(ctx, REG_KEEPER, MSK_KEEPER_MODE, VAL_KEEPER_MODE_HOST);
-       sii8620_write_seq_static(ctx,
-               REG_TDMLLCTL, 0,
-               REG_UTSRST, BIT_UTSRST_HRX_SRST | BIT_UTSRST_HTX_SRST |
-                       BIT_UTSRST_KEEPER_SRST | BIT_UTSRST_FC_SRST,
-               REG_UTSRST, BIT_UTSRST_HRX_SRST | BIT_UTSRST_HTX_SRST,
-               REG_HRXINTL, 0xff,
-               REG_HRXINTH, 0xff,
-               REG_TTXINTL, 0xff,
-               REG_TTXINTH, 0xff,
-               REG_TRXINTL, 0xff,
-               REG_TRXINTH, 0xff,
-               REG_HTXINTL, 0xff,
-               REG_HTXINTH, 0xff,
-               REG_FCINTR0, 0xff,
-               REG_FCINTR1, 0xff,
-               REG_FCINTR2, 0xff,
-               REG_FCINTR3, 0xff,
-               REG_FCINTR4, 0xff,
-               REG_FCINTR5, 0xff,
-               REG_FCINTR6, 0xff,
-               REG_FCINTR7, 0xff
-       );
-}
-
-static void sii8620_edid_read(struct sii8620 *ctx, int ret)
-{
-       if (ret < 0)
-               return;
-
-       sii8620_set_upstream_edid(ctx);
-       sii8620_hsic_init(ctx);
-       sii8620_enable_hpd(ctx);
-}
-
 static void sii8620_mr_devcap(struct sii8620 *ctx)
 {
        u8 dcap[MHL_DCAP_SIZE];
@@ -570,6 +530,8 @@ static void sii8620_mr_devcap(struct sii8620 *ctx)
                 dcap[MHL_DCAP_ADOPTER_ID_H], dcap[MHL_DCAP_ADOPTER_ID_L],
                 dcap[MHL_DCAP_DEVICE_ID_H], dcap[MHL_DCAP_DEVICE_ID_L]);
        sii8620_update_array(ctx->devcap, dcap, MHL_DCAP_SIZE);
+       ctx->devcap_read = true;
+       sii8620_identify_sink(ctx);
 }
 
 static void sii8620_mr_xdevcap(struct sii8620 *ctx)
@@ -807,6 +769,7 @@ static void sii8620_burst_rx_all(struct sii8620 *ctx)
 static void sii8620_fetch_edid(struct sii8620 *ctx)
 {
        u8 lm_ddc, ddc_cmd, int3, cbus;
+       unsigned long timeout;
        int fetched, i;
        int edid_len = EDID_LENGTH;
        u8 *edid;
@@ -856,23 +819,31 @@ static void sii8620_fetch_edid(struct sii8620 *ctx)
                        REG_DDC_CMD, ddc_cmd | VAL_DDC_CMD_ENH_DDC_READ_NO_ACK
                );
 
-               do {
-                       int3 = sii8620_readb(ctx, REG_INTR3);
+               int3 = 0;
+               timeout = jiffies + msecs_to_jiffies(200);
+               for (;;) {
                        cbus = sii8620_readb(ctx, REG_CBUS_STATUS);
-
-                       if (int3 & BIT_DDC_CMD_DONE)
-                               break;
-
-                       if (!(cbus & BIT_CBUS_STATUS_CBUS_CONNECTED)) {
+                       if (~cbus & BIT_CBUS_STATUS_CBUS_CONNECTED) {
+                               kfree(edid);
+                               edid = NULL;
+                               goto end;
+                       }
+                       if (int3 & BIT_DDC_CMD_DONE) {
+                               if (sii8620_readb(ctx, REG_DDC_DOUT_CNT)
+                                   >= FETCH_SIZE)
+                                       break;
+                       } else {
+                               int3 = sii8620_readb(ctx, REG_INTR3);
+                       }
+                       if (time_is_before_jiffies(timeout)) {
+                               ctx->error = -ETIMEDOUT;
+                               dev_err(ctx->dev, "timeout during EDID read\n");
                                kfree(edid);
                                edid = NULL;
                                goto end;
                        }
-               } while (1);
-
-               sii8620_readb(ctx, REG_DDC_STATUS);
-               while (sii8620_readb(ctx, REG_DDC_DOUT_CNT) < FETCH_SIZE)
                        usleep_range(10, 20);
+               }
 
                sii8620_read_buf(ctx, REG_DDC_DATA, edid + fetched, FETCH_SIZE);
                if (fetched + FETCH_SIZE == EDID_LENGTH) {
@@ -971,8 +942,17 @@ static int sii8620_hw_on(struct sii8620 *ctx)
        ret = regulator_bulk_enable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
        if (ret)
                return ret;
+
        usleep_range(10000, 20000);
-       return clk_prepare_enable(ctx->clk_xtal);
+       ret = clk_prepare_enable(ctx->clk_xtal);
+       if (ret)
+               return ret;
+
+       msleep(100);
+       gpiod_set_value(ctx->gpio_reset, 0);
+       msleep(100);
+
+       return 0;
 }
 
 static int sii8620_hw_off(struct sii8620 *ctx)
@@ -982,17 +962,6 @@ static int sii8620_hw_off(struct sii8620 *ctx)
        return regulator_bulk_disable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
 }
 
-static void sii8620_hw_reset(struct sii8620 *ctx)
-{
-       usleep_range(10000, 20000);
-       gpiod_set_value(ctx->gpio_reset, 0);
-       usleep_range(5000, 20000);
-       gpiod_set_value(ctx->gpio_reset, 1);
-       usleep_range(10000, 20000);
-       gpiod_set_value(ctx->gpio_reset, 0);
-       msleep(300);
-}
-
 static void sii8620_cbus_reset(struct sii8620 *ctx)
 {
        sii8620_write(ctx, REG_PWD_SRST, BIT_PWD_SRST_CBUS_RST
@@ -1048,20 +1017,11 @@ static void sii8620_stop_video(struct sii8620 *ctx)
 
 static void sii8620_set_format(struct sii8620 *ctx)
 {
-       u8 out_fmt;
-
        if (sii8620_is_mhl3(ctx)) {
                sii8620_setbits(ctx, REG_M3_P0CTRL,
                                BIT_M3_P0CTRL_MHL3_P0_PIXEL_MODE_PACKED,
                                ctx->use_packed_pixel ? ~0 : 0);
        } else {
-               if (ctx->use_packed_pixel)
-                       sii8620_write_seq_static(ctx,
-                               REG_VID_MODE, BIT_VID_MODE_M1080P,
-                               REG_MHL_TOP_CTL, BIT_MHL_TOP_CTL_MHL_PP_SEL | 1,
-                               REG_MHLTX_CTL6, 0x60
-                       );
-               else
                        sii8620_write_seq_static(ctx,
                                REG_VID_MODE, 0,
                                REG_MHL_TOP_CTL, 1,
@@ -1069,15 +1029,9 @@ static void sii8620_set_format(struct sii8620 *ctx)
                        );
        }
 
-       if (ctx->use_packed_pixel)
-               out_fmt = VAL_TPI_FORMAT(YCBCR422, FULL) |
-                       BIT_TPI_OUTPUT_CSCMODE709;
-       else
-               out_fmt = VAL_TPI_FORMAT(RGB, FULL);
-
        sii8620_write_seq(ctx,
                REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL),
-               REG_TPI_OUTPUT, out_fmt,
+               REG_TPI_OUTPUT, VAL_TPI_FORMAT(RGB, FULL),
        );
 }
 
@@ -1216,7 +1170,7 @@ static void sii8620_start_video(struct sii8620 *ctx)
                int clk = ctx->pixel_clock * (ctx->use_packed_pixel ? 2 : 3);
                int i;
 
-               for (i = 0; i < ARRAY_SIZE(clk_spec); ++i)
+               for (i = 0; i < ARRAY_SIZE(clk_spec) - 1; ++i)
                        if (clk < clk_spec[i].max_clk)
                                break;
 
@@ -1534,6 +1488,16 @@ static void sii8620_set_mode(struct sii8620 *ctx, enum sii8620_mode mode)
        );
 }
 
+static void sii8620_hpd_unplugged(struct sii8620 *ctx)
+{
+       sii8620_disable_hpd(ctx);
+       ctx->sink_type = SINK_NONE;
+       ctx->sink_detected = false;
+       ctx->feature_complete = false;
+       kfree(ctx->edid);
+       ctx->edid = NULL;
+}
+
 static void sii8620_disconnect(struct sii8620 *ctx)
 {
        sii8620_disable_gen2_write_burst(ctx);
@@ -1561,7 +1525,7 @@ static void sii8620_disconnect(struct sii8620 *ctx)
                REG_MHL_DP_CTL6, 0x2A,
                REG_MHL_DP_CTL7, 0x03
        );
-       sii8620_disable_hpd(ctx);
+       sii8620_hpd_unplugged(ctx);
        sii8620_write_seq_static(ctx,
                REG_M3_CTRL, VAL_M3_CTRL_MHL3_VALUE,
                REG_MHL_COC_CTL1, 0x07,
@@ -1609,10 +1573,8 @@ static void sii8620_disconnect(struct sii8620 *ctx)
        memset(ctx->xstat, 0, sizeof(ctx->xstat));
        memset(ctx->devcap, 0, sizeof(ctx->devcap));
        memset(ctx->xdevcap, 0, sizeof(ctx->xdevcap));
+       ctx->devcap_read = false;
        ctx->cbus_status = 0;
-       ctx->sink_type = SINK_NONE;
-       kfree(ctx->edid);
-       ctx->edid = NULL;
        sii8620_mt_cleanup(ctx);
 }
 
@@ -1703,9 +1665,6 @@ static void sii8620_status_changed_path(struct sii8620 *ctx)
                sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
                                      MHL_DST_LM_CLK_MODE_NORMAL
                                      | MHL_DST_LM_PATH_ENABLED);
-               if (!sii8620_is_mhl3(ctx))
-                       sii8620_mt_read_devcap(ctx, false);
-               sii8620_mt_set_cont(ctx, sii8620_sink_detected);
        } else {
                sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
                                      MHL_DST_LM_CLK_MODE_NORMAL);
@@ -1722,9 +1681,14 @@ static void sii8620_msc_mr_write_stat(struct sii8620 *ctx)
        sii8620_update_array(ctx->stat, st, MHL_DST_SIZE);
        sii8620_update_array(ctx->xstat, xst, MHL_XDS_SIZE);
 
-       if (ctx->stat[MHL_DST_CONNECTED_RDY] & MHL_DST_CONN_DCAP_RDY)
+       if (ctx->stat[MHL_DST_CONNECTED_RDY] & st[MHL_DST_CONNECTED_RDY] &
+           MHL_DST_CONN_DCAP_RDY) {
                sii8620_status_dcap_ready(ctx);
 
+               if (!sii8620_is_mhl3(ctx))
+                       sii8620_mt_read_devcap(ctx, false);
+       }
+
        if (st[MHL_DST_LINK_MODE] & MHL_DST_LM_PATH_ENABLED)
                sii8620_status_changed_path(ctx);
 }
@@ -1808,8 +1772,11 @@ static void sii8620_msc_mr_set_int(struct sii8620 *ctx)
        }
        if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_REQ)
                sii8620_send_features(ctx);
-       if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_COMPLETE)
-               sii8620_edid_read(ctx, 0);
+       if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_COMPLETE) {
+               ctx->feature_complete = true;
+               if (ctx->edid)
+                       sii8620_enable_hpd(ctx);
+       }
 }
 
 static struct sii8620_mt_msg *sii8620_msc_msg_first(struct sii8620 *ctx)
@@ -1884,6 +1851,15 @@ static void sii8620_irq_msc(struct sii8620 *ctx)
        if (stat & BIT_CBUS_MSC_MR_WRITE_STAT)
                sii8620_msc_mr_write_stat(ctx);
 
+       if (stat & BIT_CBUS_HPD_CHG) {
+               if (ctx->cbus_status & BIT_CBUS_STATUS_CBUS_HPD) {
+                       ctx->sink_detected = true;
+                       sii8620_identify_sink(ctx);
+               } else {
+                       sii8620_hpd_unplugged(ctx);
+               }
+       }
+
        if (stat & BIT_CBUS_MSC_MR_SET_INT)
                sii8620_msc_mr_set_int(ctx);
 
@@ -1931,14 +1907,6 @@ static void sii8620_irq_edid(struct sii8620 *ctx)
                ctx->mt_state = MT_STATE_DONE;
 }
 
-static void sii8620_scdt_high(struct sii8620 *ctx)
-{
-       sii8620_write_seq_static(ctx,
-               REG_INTR8_MASK, BIT_CEA_NEW_AVI | BIT_CEA_NEW_VSI,
-               REG_TPI_SC, BIT_TPI_SC_TPI_OUTPUT_MODE_0_HDMI,
-       );
-}
-
 static void sii8620_irq_scdt(struct sii8620 *ctx)
 {
        u8 stat = sii8620_readb(ctx, REG_INTR5);
@@ -1946,53 +1914,13 @@ static void sii8620_irq_scdt(struct sii8620 *ctx)
        if (stat & BIT_INTR_SCDT_CHANGE) {
                u8 cstat = sii8620_readb(ctx, REG_TMDS_CSTAT_P3);
 
-               if (cstat & BIT_TMDS_CSTAT_P3_SCDT) {
-                       if (ctx->sink_type == SINK_HDMI)
-                               /* enable infoframe interrupt */
-                               sii8620_scdt_high(ctx);
-                       else
-                               sii8620_start_video(ctx);
-               }
+               if (cstat & BIT_TMDS_CSTAT_P3_SCDT)
+                       sii8620_start_video(ctx);
        }
 
        sii8620_write(ctx, REG_INTR5, stat);
 }
 
-static void sii8620_new_vsi(struct sii8620 *ctx)
-{
-       u8 vsif[11];
-
-       sii8620_write(ctx, REG_RX_HDMI_CTRL2,
-                     VAL_RX_HDMI_CTRL2_DEFVAL |
-                     BIT_RX_HDMI_CTRL2_VSI_MON_SEL_VSI);
-       sii8620_read_buf(ctx, REG_RX_HDMI_MON_PKT_HEADER1, vsif,
-                        ARRAY_SIZE(vsif));
-}
-
-static void sii8620_new_avi(struct sii8620 *ctx)
-{
-       sii8620_write(ctx, REG_RX_HDMI_CTRL2, VAL_RX_HDMI_CTRL2_DEFVAL);
-       sii8620_read_buf(ctx, REG_RX_HDMI_MON_PKT_HEADER1, ctx->avif,
-                        ARRAY_SIZE(ctx->avif));
-}
-
-static void sii8620_irq_infr(struct sii8620 *ctx)
-{
-       u8 stat = sii8620_readb(ctx, REG_INTR8)
-               & (BIT_CEA_NEW_VSI | BIT_CEA_NEW_AVI);
-
-       sii8620_write(ctx, REG_INTR8, stat);
-
-       if (stat & BIT_CEA_NEW_VSI)
-               sii8620_new_vsi(ctx);
-
-       if (stat & BIT_CEA_NEW_AVI)
-               sii8620_new_avi(ctx);
-
-       if (stat & (BIT_CEA_NEW_VSI | BIT_CEA_NEW_AVI))
-               sii8620_start_video(ctx);
-}
-
 static void sii8620_got_xdevcap(struct sii8620 *ctx, int ret)
 {
        if (ret < 0)
@@ -2043,11 +1971,11 @@ static void sii8620_irq_ddc(struct sii8620 *ctx)
 
        if (stat & BIT_DDC_CMD_DONE) {
                sii8620_write(ctx, REG_INTR3_MASK, 0);
-               if (sii8620_is_mhl3(ctx))
+               if (sii8620_is_mhl3(ctx) && !ctx->feature_complete)
                        sii8620_mt_set_int(ctx, MHL_INT_REG(RCHANGE),
                                           MHL_INT_RC_FEAT_REQ);
                else
-                       sii8620_edid_read(ctx, 0);
+                       sii8620_enable_hpd(ctx);
        }
        sii8620_write(ctx, REG_INTR3, stat);
 }
@@ -2074,7 +2002,6 @@ static irqreturn_t sii8620_irq_thread(int irq, void *data)
                { BIT_FAST_INTR_STAT_EDID, sii8620_irq_edid },
                { BIT_FAST_INTR_STAT_DDC, sii8620_irq_ddc },
                { BIT_FAST_INTR_STAT_SCDT, sii8620_irq_scdt },
-               { BIT_FAST_INTR_STAT_INFR, sii8620_irq_infr },
        };
        struct sii8620 *ctx = data;
        u8 stats[LEN_FAST_INTR_STAT];
@@ -2112,7 +2039,6 @@ static void sii8620_cable_in(struct sii8620 *ctx)
                dev_err(dev, "Error powering on, %d.\n", ret);
                return;
        }
-       sii8620_hw_reset(ctx);
 
        sii8620_read_buf(ctx, REG_VND_IDL, ver, ARRAY_SIZE(ver));
        ret = sii8620_clear_error(ctx);
@@ -2268,17 +2194,43 @@ static void sii8620_detach(struct drm_bridge *bridge)
        rc_unregister_device(ctx->rc_dev);
 }
 
+static int sii8620_is_packing_required(struct sii8620 *ctx,
+                                      const struct drm_display_mode *mode)
+{
+       int max_pclk, max_pclk_pp_mode;
+
+       if (sii8620_is_mhl3(ctx)) {
+               max_pclk = MHL3_MAX_PCLK;
+               max_pclk_pp_mode = MHL3_MAX_PCLK_PP_MODE;
+       } else {
+               max_pclk = MHL1_MAX_PCLK;
+               max_pclk_pp_mode = MHL1_MAX_PCLK_PP_MODE;
+       }
+
+       if (mode->clock < max_pclk)
+               return 0;
+       else if (mode->clock < max_pclk_pp_mode)
+               return 1;
+       else
+               return -1;
+}
+
 static enum drm_mode_status sii8620_mode_valid(struct drm_bridge *bridge,
                                         const struct drm_display_mode *mode)
 {
        struct sii8620 *ctx = bridge_to_sii8620(bridge);
+       int pack_required = sii8620_is_packing_required(ctx, mode);
        bool can_pack = ctx->devcap[MHL_DCAP_VID_LINK_MODE] &
                        MHL_DCAP_VID_LINK_PPIXEL;
-       unsigned int max_pclk = sii8620_is_mhl3(ctx) ? MHL3_MAX_LCLK :
-                                                      MHL1_MAX_LCLK;
-       max_pclk /= can_pack ? 2 : 3;
 
-       return (mode->clock > max_pclk) ? MODE_CLOCK_HIGH : MODE_OK;
+       switch (pack_required) {
+       case 0:
+               return MODE_OK;
+       case 1:
+               return (can_pack) ? MODE_OK : MODE_CLOCK_HIGH;
+       default:
+               return MODE_CLOCK_HIGH;
+       }
 }
 
 static bool sii8620_mode_fixup(struct drm_bridge *bridge,
@@ -2286,43 +2238,16 @@ static bool sii8620_mode_fixup(struct drm_bridge *bridge,
                               struct drm_display_mode *adjusted_mode)
 {
        struct sii8620 *ctx = bridge_to_sii8620(bridge);
-       int max_lclk;
-       bool ret = true;
 
        mutex_lock(&ctx->lock);
 
-       max_lclk = sii8620_is_mhl3(ctx) ? MHL3_MAX_LCLK : MHL1_MAX_LCLK;
-       if (max_lclk > 3 * adjusted_mode->clock) {
-               ctx->use_packed_pixel = 0;
-               goto end;
-       }
-       if ((ctx->devcap[MHL_DCAP_VID_LINK_MODE] & MHL_DCAP_VID_LINK_PPIXEL) &&
-           max_lclk > 2 * adjusted_mode->clock) {
-               ctx->use_packed_pixel = 1;
-               goto end;
-       }
-       ret = false;
-end:
-       if (ret) {
-               u8 vic = drm_match_cea_mode(adjusted_mode);
-
-               if (!vic) {
-                       union hdmi_infoframe frm;
-                       u8 mhl_vic[] = { 0, 95, 94, 93, 98 };
-
-                       /* FIXME: We need the connector here */
-                       drm_hdmi_vendor_infoframe_from_display_mode(
-                               &frm.vendor.hdmi, NULL, adjusted_mode);
-                       vic = frm.vendor.hdmi.vic;
-                       if (vic >= ARRAY_SIZE(mhl_vic))
-                               vic = 0;
-                       vic = mhl_vic[vic];
-               }
-               ctx->video_code = vic;
-               ctx->pixel_clock = adjusted_mode->clock;
-       }
+       ctx->use_packed_pixel = sii8620_is_packing_required(ctx, adjusted_mode);
+       ctx->video_code = drm_match_cea_mode(adjusted_mode);
+       ctx->pixel_clock = adjusted_mode->clock;
+
        mutex_unlock(&ctx->lock);
-       return ret;
+
+       return true;
 }
 
 static const struct drm_bridge_funcs sii8620_bridge_funcs = {
index b553a6f..7af748e 100644 (file)
@@ -369,13 +369,6 @@ EXPORT_SYMBOL(drm_dev_exit);
  */
 void drm_dev_unplug(struct drm_device *dev)
 {
-       drm_dev_unregister(dev);
-
-       mutex_lock(&drm_global_mutex);
-       if (dev->open_count == 0)
-               drm_dev_put(dev);
-       mutex_unlock(&drm_global_mutex);
-
        /*
         * After synchronizing any critical read section is guaranteed to see
         * the new value of ->unplugged, and any critical section which might
@@ -384,6 +377,13 @@ void drm_dev_unplug(struct drm_device *dev)
         */
        dev->unplugged = true;
        synchronize_srcu(&drm_unplug_srcu);
+
+       drm_dev_unregister(dev);
+
+       mutex_lock(&drm_global_mutex);
+       if (dev->open_count == 0)
+               drm_dev_put(dev);
+       mutex_unlock(&drm_global_mutex);
 }
 EXPORT_SYMBOL(drm_dev_unplug);
 
index 34c125e..7014a96 100644 (file)
@@ -340,14 +340,21 @@ struct drm_i915_file_private {
 
        unsigned int bsd_engine;
 
-/* Client can have a maximum of 3 contexts banned before
- * it is denied of creating new contexts. As one context
- * ban needs 4 consecutive hangs, and more if there is
- * progress in between, this is a last resort stop gap measure
- * to limit the badly behaving clients access to gpu.
+/*
+ * Every context ban increments per client ban score. Also
+ * hangs in short succession increments ban score. If ban threshold
+ * is reached, client is considered banned and submitting more work
+ * will fail. This is a stop gap measure to limit the badly behaving
+ * clients access to gpu. Note that unbannable contexts never increment
+ * the client ban score.
  */
-#define I915_MAX_CLIENT_CONTEXT_BANS 3
-       atomic_t context_bans;
+#define I915_CLIENT_SCORE_HANG_FAST    1
+#define   I915_CLIENT_FAST_HANG_JIFFIES (60 * HZ)
+#define I915_CLIENT_SCORE_CONTEXT_BAN   3
+#define I915_CLIENT_SCORE_BANNED       9
+       /** ban_score: Accumulated score of all ctx bans and fast hangs. */
+       atomic_t ban_score;
+       unsigned long hang_timestamp;
 };
 
 /* Interface history:
index 3704f4c..d44ad7b 100644 (file)
@@ -2933,32 +2933,54 @@ i915_gem_object_pwrite_gtt(struct drm_i915_gem_object *obj,
        return 0;
 }
 
+static void i915_gem_client_mark_guilty(struct drm_i915_file_private *file_priv,
+                                       const struct i915_gem_context *ctx)
+{
+       unsigned int score;
+       unsigned long prev_hang;
+
+       if (i915_gem_context_is_banned(ctx))
+               score = I915_CLIENT_SCORE_CONTEXT_BAN;
+       else
+               score = 0;
+
+       prev_hang = xchg(&file_priv->hang_timestamp, jiffies);
+       if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES))
+               score += I915_CLIENT_SCORE_HANG_FAST;
+
+       if (score) {
+               atomic_add(score, &file_priv->ban_score);
+
+               DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n",
+                                ctx->name, score,
+                                atomic_read(&file_priv->ban_score));
+       }
+}
+
 static void i915_gem_context_mark_guilty(struct i915_gem_context *ctx)
 {
-       bool banned;
+       unsigned int score;
+       bool banned, bannable;
 
        atomic_inc(&ctx->guilty_count);
 
-       banned = false;
-       if (i915_gem_context_is_bannable(ctx)) {
-               unsigned int score;
+       bannable = i915_gem_context_is_bannable(ctx);
+       score = atomic_add_return(CONTEXT_SCORE_GUILTY, &ctx->ban_score);
+       banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
 
-               score = atomic_add_return(CONTEXT_SCORE_GUILTY,
-                                         &ctx->ban_score);
-               banned = score >= CONTEXT_SCORE_BAN_THRESHOLD;
+       DRM_DEBUG_DRIVER("context %s: guilty %d, score %u, ban %s\n",
+                        ctx->name, atomic_read(&ctx->guilty_count),
+                        score, yesno(banned && bannable));
 
-               DRM_DEBUG_DRIVER("context %s marked guilty (score %d) banned? %s\n",
-                                ctx->name, score, yesno(banned));
-       }
-       if (!banned)
+       /* Cool contexts don't accumulate client ban score */
+       if (!bannable)
                return;
 
-       i915_gem_context_set_banned(ctx);
-       if (!IS_ERR_OR_NULL(ctx->file_priv)) {
-               atomic_inc(&ctx->file_priv->context_bans);
-               DRM_DEBUG_DRIVER("client %s has had %d context banned\n",
-                                ctx->name, atomic_read(&ctx->file_priv->context_bans));
-       }
+       if (banned)
+               i915_gem_context_set_banned(ctx);
+
+       if (!IS_ERR_OR_NULL(ctx->file_priv))
+               i915_gem_client_mark_guilty(ctx->file_priv, ctx);
 }
 
 static void i915_gem_context_mark_innocent(struct i915_gem_context *ctx)
@@ -5736,6 +5758,7 @@ int i915_gem_open(struct drm_i915_private *i915, struct drm_file *file)
        INIT_LIST_HEAD(&file_priv->mm.request_list);
 
        file_priv->bsd_engine = -1;
+       file_priv->hang_timestamp = jiffies;
 
        ret = i915_gem_context_open(i915, file);
        if (ret)
index 33f8a4b..060335d 100644 (file)
@@ -652,7 +652,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 
 static bool client_is_banned(struct drm_i915_file_private *file_priv)
 {
-       return atomic_read(&file_priv->context_bans) > I915_MAX_CLIENT_CONTEXT_BANS;
+       return atomic_read(&file_priv->ban_score) >= I915_CLIENT_SCORE_BANNED;
 }
 
 int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
index f627a8c..22df17c 100644 (file)
@@ -489,7 +489,9 @@ eb_validate_vma(struct i915_execbuffer *eb,
 }
 
 static int
-eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
+eb_add_vma(struct i915_execbuffer *eb,
+          unsigned int i, unsigned batch_idx,
+          struct i915_vma *vma)
 {
        struct drm_i915_gem_exec_object2 *entry = &eb->exec[i];
        int err;
@@ -522,6 +524,24 @@ eb_add_vma(struct i915_execbuffer *eb, unsigned int i, struct i915_vma *vma)
        eb->flags[i] = entry->flags;
        vma->exec_flags = &eb->flags[i];
 
+       /*
+        * SNA is doing fancy tricks with compressing batch buffers, which leads
+        * to negative relocation deltas. Usually that works out ok since the
+        * relocate address is still positive, except when the batch is placed
+        * very low in the GTT. Ensure this doesn't happen.
+        *
+        * Note that actual hangs have only been observed on gen7, but for
+        * paranoia do it everywhere.
+        */
+       if (i == batch_idx) {
+               if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
+                       eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
+               if (eb->reloc_cache.has_fence)
+                       eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
+
+               eb->batch = vma;
+       }
+
        err = 0;
        if (eb_pin_vma(eb, entry, vma)) {
                if (entry->offset != vma->node.start) {
@@ -716,7 +736,7 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
 {
        struct radix_tree_root *handles_vma = &eb->ctx->handles_vma;
        struct drm_i915_gem_object *obj;
-       unsigned int i;
+       unsigned int i, batch;
        int err;
 
        if (unlikely(i915_gem_context_is_closed(eb->ctx)))
@@ -728,6 +748,8 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
        INIT_LIST_HEAD(&eb->relocs);
        INIT_LIST_HEAD(&eb->unbound);
 
+       batch = eb_batch_index(eb);
+
        for (i = 0; i < eb->buffer_count; i++) {
                u32 handle = eb->exec[i].handle;
                struct i915_lut_handle *lut;
@@ -770,33 +792,16 @@ static int eb_lookup_vmas(struct i915_execbuffer *eb)
                lut->handle = handle;
 
 add_vma:
-               err = eb_add_vma(eb, i, vma);
+               err = eb_add_vma(eb, i, batch, vma);
                if (unlikely(err))
                        goto err_vma;
 
                GEM_BUG_ON(vma != eb->vma[i]);
                GEM_BUG_ON(vma->exec_flags != &eb->flags[i]);
+               GEM_BUG_ON(drm_mm_node_allocated(&vma->node) &&
+                          eb_vma_misplaced(&eb->exec[i], vma, eb->flags[i]));
        }
 
-       /* take note of the batch buffer before we might reorder the lists */
-       i = eb_batch_index(eb);
-       eb->batch = eb->vma[i];
-       GEM_BUG_ON(eb->batch->exec_flags != &eb->flags[i]);
-
-       /*
-        * SNA is doing fancy tricks with compressing batch buffers, which leads
-        * to negative relocation deltas. Usually that works out ok since the
-        * relocate address is still positive, except when the batch is placed
-        * very low in the GTT. Ensure this doesn't happen.
-        *
-        * Note that actual hangs have only been observed on gen7, but for
-        * paranoia do it everywhere.
-        */
-       if (!(eb->flags[i] & EXEC_OBJECT_PINNED))
-               eb->flags[i] |= __EXEC_OBJECT_NEEDS_BIAS;
-       if (eb->reloc_cache.has_fence)
-               eb->flags[i] |= EXEC_OBJECT_NEEDS_FENCE;
-
        eb->args->flags |= __EXEC_VALIDATED;
        return eb_reserve(eb);
 
index f9bc3aa..4a02747 100644 (file)
@@ -1893,9 +1893,17 @@ static void i9xx_pipestat_irq_ack(struct drm_i915_private *dev_priv,
 
                /*
                 * Clear the PIPE*STAT regs before the IIR
+                *
+                * Toggle the enable bits to make sure we get an
+                * edge in the ISR pipe event bit if we don't clear
+                * all the enabled status bits. Otherwise the edge
+                * triggered IIR on i965/g4x wouldn't notice that
+                * an interrupt is still pending.
                 */
-               if (pipe_stats[pipe])
-                       I915_WRITE(reg, enable_mask | pipe_stats[pipe]);
+               if (pipe_stats[pipe]) {
+                       I915_WRITE(reg, pipe_stats[pipe]);
+                       I915_WRITE(reg, enable_mask);
+               }
        }
        spin_unlock(&dev_priv->irq_lock);
 }
index f11bb21..7720569 100644 (file)
@@ -2425,12 +2425,17 @@ enum i915_power_well_id {
 #define _3D_CHICKEN    _MMIO(0x2084)
 #define  _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB     (1 << 10)
 #define _3D_CHICKEN2   _MMIO(0x208c)
+
+#define FF_SLICE_CHICKEN       _MMIO(0x2088)
+#define  FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX      (1 << 1)
+
 /* Disables pipelining of read flushes past the SF-WIZ interface.
  * Required on all Ironlake steppings according to the B-Spec, but the
  * particular danger of not doing so is not specified.
  */
 # define _3D_CHICKEN2_WM_READ_PIPELINED                        (1 << 14)
 #define _3D_CHICKEN3   _MMIO(0x2090)
+#define  _3D_CHICKEN_SF_PROVOKING_VERTEX_FIX           (1 << 12)
 #define  _3D_CHICKEN_SF_DISABLE_OBJEND_CULL            (1 << 10)
 #define  _3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE       (1 << 5)
 #define  _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL         (1 << 5)
index de0e223..072b326 100644 (file)
@@ -304,6 +304,9 @@ intel_crt_mode_valid(struct drm_connector *connector,
        int max_dotclk = dev_priv->max_dotclk_freq;
        int max_clock;
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
+
        if (mode->clock < 25000)
                return MODE_CLOCK_LOW;
 
@@ -337,6 +340,12 @@ static bool intel_crt_compute_config(struct intel_encoder *encoder,
                                     struct intel_crtc_state *pipe_config,
                                     struct drm_connector_state *conn_state)
 {
+       struct drm_display_mode *adjusted_mode =
+               &pipe_config->base.adjusted_mode;
+
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
        return true;
 }
 
@@ -344,6 +353,12 @@ static bool pch_crt_compute_config(struct intel_encoder *encoder,
                                   struct intel_crtc_state *pipe_config,
                                   struct drm_connector_state *conn_state)
 {
+       struct drm_display_mode *adjusted_mode =
+               &pipe_config->base.adjusted_mode;
+
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
        pipe_config->has_pch_encoder = true;
 
        return true;
@@ -354,6 +369,11 @@ static bool hsw_crt_compute_config(struct intel_encoder *encoder,
                                   struct drm_connector_state *conn_state)
 {
        struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+       struct drm_display_mode *adjusted_mode =
+               &pipe_config->base.adjusted_mode;
+
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
 
        pipe_config->has_pch_encoder = true;
 
index dee3a8e..2cc6faa 100644 (file)
@@ -14469,12 +14469,22 @@ static enum drm_mode_status
 intel_mode_valid(struct drm_device *dev,
                 const struct drm_display_mode *mode)
 {
+       /*
+        * Can't reject DBLSCAN here because Xorg ddxen can add piles
+        * of DBLSCAN modes to the output's mode list when they detect
+        * the scaling mode property on the connector. And they don't
+        * ask the kernel to validate those modes in any way until
+        * modeset time at which point the client gets a protocol error.
+        * So in order to not upset those clients we silently ignore the
+        * DBLSCAN flag on such connectors. For other connectors we will
+        * reject modes with the DBLSCAN flag in encoder->compute_config().
+        * And we always reject DBLSCAN modes in connector->mode_valid()
+        * as we never want such modes on the connector's mode list.
+        */
+
        if (mode->vscan > 1)
                return MODE_NO_VSCAN;
 
-       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
-               return MODE_NO_DBLESCAN;
-
        if (mode->flags & DRM_MODE_FLAG_HSKEW)
                return MODE_H_ILLEGAL;
 
index 8320f0e..16faea3 100644 (file)
@@ -420,6 +420,9 @@ intel_dp_mode_valid(struct drm_connector *connector,
        int max_rate, mode_rate, max_lanes, max_link_clock;
        int max_dotclk;
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
+
        max_dotclk = intel_dp_downstream_max_dotclock(intel_dp);
 
        if (intel_dp_is_edp(intel_dp) && fixed_mode) {
@@ -1862,7 +1865,10 @@ intel_dp_compute_config(struct intel_encoder *encoder,
                                                conn_state->scaling_mode);
        }
 
-       if ((IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) &&
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
+       if (HAS_GMCH_DISPLAY(dev_priv) &&
            adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE)
                return false;
 
@@ -2782,16 +2788,6 @@ static void intel_disable_dp(struct intel_encoder *encoder,
 static void g4x_disable_dp(struct intel_encoder *encoder,
                           const struct intel_crtc_state *old_crtc_state,
                           const struct drm_connector_state *old_conn_state)
-{
-       intel_disable_dp(encoder, old_crtc_state, old_conn_state);
-
-       /* disable the port before the pipe on g4x */
-       intel_dp_link_down(encoder, old_crtc_state);
-}
-
-static void ilk_disable_dp(struct intel_encoder *encoder,
-                          const struct intel_crtc_state *old_crtc_state,
-                          const struct drm_connector_state *old_conn_state)
 {
        intel_disable_dp(encoder, old_crtc_state, old_conn_state);
 }
@@ -2807,13 +2803,19 @@ static void vlv_disable_dp(struct intel_encoder *encoder,
        intel_disable_dp(encoder, old_crtc_state, old_conn_state);
 }
 
-static void ilk_post_disable_dp(struct intel_encoder *encoder,
+static void g4x_post_disable_dp(struct intel_encoder *encoder,
                                const struct intel_crtc_state *old_crtc_state,
                                const struct drm_connector_state *old_conn_state)
 {
        struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
        enum port port = encoder->port;
 
+       /*
+        * Bspec does not list a specific disable sequence for g4x DP.
+        * Follow the ilk+ sequence (disable pipe before the port) for
+        * g4x DP as it does not suffer from underruns like the normal
+        * g4x modeset sequence (disable pipe after the port).
+        */
        intel_dp_link_down(encoder, old_crtc_state);
 
        /* Only ilk+ has port A */
@@ -6337,7 +6339,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
        drm_connector_init(dev, connector, &intel_dp_connector_funcs, type);
        drm_connector_helper_add(connector, &intel_dp_connector_helper_funcs);
 
-       if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
+       if (!HAS_GMCH_DISPLAY(dev_priv))
                connector->interlace_allowed = true;
        connector->doublescan_allowed = 0;
 
@@ -6436,15 +6438,11 @@ bool intel_dp_init(struct drm_i915_private *dev_priv,
                intel_encoder->enable = vlv_enable_dp;
                intel_encoder->disable = vlv_disable_dp;
                intel_encoder->post_disable = vlv_post_disable_dp;
-       } else if (INTEL_GEN(dev_priv) >= 5) {
-               intel_encoder->pre_enable = g4x_pre_enable_dp;
-               intel_encoder->enable = g4x_enable_dp;
-               intel_encoder->disable = ilk_disable_dp;
-               intel_encoder->post_disable = ilk_post_disable_dp;
        } else {
                intel_encoder->pre_enable = g4x_pre_enable_dp;
                intel_encoder->enable = g4x_enable_dp;
                intel_encoder->disable = g4x_disable_dp;
+               intel_encoder->post_disable = g4x_post_disable_dp;
        }
 
        intel_dig_port->dp.output_reg = output_reg;
index 9e6956c..5890500 100644 (file)
@@ -48,6 +48,9 @@ static bool intel_dp_mst_compute_config(struct intel_encoder *encoder,
        bool reduce_m_n = drm_dp_has_quirk(&intel_dp->desc,
                                           DP_DPCD_QUIRK_LIMITED_M_N);
 
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
        pipe_config->has_pch_encoder = false;
        bpp = 24;
        if (intel_dp->compliance.test_data.bpc) {
@@ -366,6 +369,9 @@ intel_dp_mst_mode_valid(struct drm_connector *connector,
        if (!intel_dp)
                return MODE_ERROR;
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
+
        max_link_clock = intel_dp_max_link_rate(intel_dp);
        max_lanes = intel_dp_max_lane_count(intel_dp);
 
index cf39ca9..f349b39 100644 (file)
@@ -326,6 +326,9 @@ static bool intel_dsi_compute_config(struct intel_encoder *encoder,
                                                conn_state->scaling_mode);
        }
 
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
        /* DSI uses short packets for sync events, so clear mode flags for DSI */
        adjusted_mode->flags = 0;
 
@@ -1266,6 +1269,9 @@ intel_dsi_mode_valid(struct drm_connector *connector,
 
        DRM_DEBUG_KMS("\n");
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
+
        if (fixed_mode) {
                if (mode->hdisplay > fixed_mode->hdisplay)
                        return MODE_PANEL;
index a70d767..61d908e 100644 (file)
@@ -219,6 +219,9 @@ intel_dvo_mode_valid(struct drm_connector *connector,
        int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
        int target_clock = mode->clock;
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
+
        /* XXX: Validate clock range */
 
        if (fixed_mode) {
@@ -254,6 +257,9 @@ static bool intel_dvo_compute_config(struct intel_encoder *encoder,
        if (fixed_mode)
                intel_fixed_panel_mode(fixed_mode, adjusted_mode);
 
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
        return true;
 }
 
index ee929f3..d8cb53e 100644 (file)
@@ -1557,6 +1557,9 @@ intel_hdmi_mode_valid(struct drm_connector *connector,
        bool force_dvi =
                READ_ONCE(to_intel_digital_connector_state(connector->state)->force_audio) == HDMI_AUDIO_OFF_DVI;
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
+
        clock = mode->clock;
 
        if ((mode->flags & DRM_MODE_FLAG_3D_MASK) == DRM_MODE_FLAG_3D_FRAME_PACKING)
@@ -1677,6 +1680,9 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
        int desired_bpp;
        bool force_dvi = intel_conn_state->force_audio == HDMI_AUDIO_OFF_DVI;
 
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
        pipe_config->has_hdmi_sink = !force_dvi && intel_hdmi->has_hdmi_sink;
 
        if (pipe_config->has_hdmi_sink)
index 15434ca..7c4c8fb 100644 (file)
@@ -1545,11 +1545,21 @@ static u32 *gen9_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
        /* WaFlushCoherentL3CacheLinesAtContextSwitch:skl,bxt,glk */
        batch = gen8_emit_flush_coherentl3_wa(engine, batch);
 
+       *batch++ = MI_LOAD_REGISTER_IMM(3);
+
        /* WaDisableGatherAtSetShaderCommonSlice:skl,bxt,kbl,glk */
-       *batch++ = MI_LOAD_REGISTER_IMM(1);
        *batch++ = i915_mmio_reg_offset(COMMON_SLICE_CHICKEN2);
        *batch++ = _MASKED_BIT_DISABLE(
                        GEN9_DISABLE_GATHER_AT_SET_SHADER_COMMON_SLICE);
+
+       /* BSpec: 11391 */
+       *batch++ = i915_mmio_reg_offset(FF_SLICE_CHICKEN);
+       *batch++ = _MASKED_BIT_ENABLE(FF_SLICE_CHICKEN_CL_PROVOKING_VERTEX_FIX);
+
+       /* BSpec: 11299 */
+       *batch++ = i915_mmio_reg_offset(_3D_CHICKEN3);
+       *batch++ = _MASKED_BIT_ENABLE(_3D_CHICKEN_SF_PROVOKING_VERTEX_FIX);
+
        *batch++ = MI_NOOP;
 
        /* WaClearSlmSpaceAtContextSwitch:kbl */
@@ -2641,10 +2651,8 @@ static int execlists_context_deferred_alloc(struct i915_gem_context *ctx,
        context_size += LRC_HEADER_PAGES * PAGE_SIZE;
 
        ctx_obj = i915_gem_object_create(ctx->i915, context_size);
-       if (IS_ERR(ctx_obj)) {
-               ret = PTR_ERR(ctx_obj);
-               goto error_deref_obj;
-       }
+       if (IS_ERR(ctx_obj))
+               return PTR_ERR(ctx_obj);
 
        vma = i915_vma_instance(ctx_obj, &ctx->i915->ggtt.base, NULL);
        if (IS_ERR(vma)) {
index d278f24..48f618d 100644 (file)
@@ -380,6 +380,8 @@ intel_lvds_mode_valid(struct drm_connector *connector,
        struct drm_display_mode *fixed_mode = intel_connector->panel.fixed_mode;
        int max_pixclk = to_i915(connector->dev)->max_dotclk_freq;
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
        if (mode->hdisplay > fixed_mode->hdisplay)
                return MODE_PANEL;
        if (mode->vdisplay > fixed_mode->vdisplay)
@@ -429,6 +431,9 @@ static bool intel_lvds_compute_config(struct intel_encoder *intel_encoder,
        intel_fixed_panel_mode(intel_connector->panel.fixed_mode,
                               adjusted_mode);
 
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
        if (HAS_PCH_SPLIT(dev_priv)) {
                pipe_config->has_pch_encoder = true;
 
index 2500502..26975df 100644 (file)
@@ -1160,6 +1160,9 @@ static bool intel_sdvo_compute_config(struct intel_encoder *encoder,
                                                           adjusted_mode);
        }
 
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
        /*
         * Make the CRTC code factor in the SDVO pixel multiplier.  The
         * SDVO device will factor out the multiplier during mode_set.
@@ -1621,6 +1624,9 @@ intel_sdvo_mode_valid(struct drm_connector *connector,
        struct intel_sdvo *intel_sdvo = intel_attached_sdvo(connector);
        int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
+
        if (intel_sdvo->pixel_clock_min > mode->clock)
                return MODE_CLOCK_LOW;
 
index 885fc38..b55b5c1 100644 (file)
@@ -850,6 +850,9 @@ intel_tv_mode_valid(struct drm_connector *connector,
        const struct tv_mode *tv_mode = intel_tv_mode_find(connector->state);
        int max_dotclk = to_i915(connector->dev)->max_dotclk_freq;
 
+       if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return MODE_NO_DBLESCAN;
+
        if (mode->clock > max_dotclk)
                return MODE_CLOCK_HIGH;
 
@@ -877,16 +880,21 @@ intel_tv_compute_config(struct intel_encoder *encoder,
                        struct drm_connector_state *conn_state)
 {
        const struct tv_mode *tv_mode = intel_tv_mode_find(conn_state);
+       struct drm_display_mode *adjusted_mode =
+               &pipe_config->base.adjusted_mode;
 
        if (!tv_mode)
                return false;
 
-       pipe_config->base.adjusted_mode.crtc_clock = tv_mode->clock;
+       if (adjusted_mode->flags & DRM_MODE_FLAG_DBLSCAN)
+               return false;
+
+       adjusted_mode->crtc_clock = tv_mode->clock;
        DRM_DEBUG_KMS("forcing bpc to 8 for TV\n");
        pipe_config->pipe_bpp = 8*3;
 
        /* TV has it's own notion of sync and other mode flags, so clear them. */
-       pipe_config->base.adjusted_mode.flags = 0;
+       adjusted_mode->flags = 0;
 
        /*
         * FIXME: We don't check whether the input mode is actually what we want
index 291c081..397143b 100644 (file)
@@ -132,7 +132,7 @@ curs507a_new_(const struct nv50_wimm_func *func, struct nouveau_drm *drm,
 
        nvif_object_map(&wndw->wimm.base.user, NULL, 0);
        wndw->immd = func;
-       wndw->ctxdma.parent = &disp->core->chan.base.user;
+       wndw->ctxdma.parent = NULL;
        return 0;
 }
 
index 224963b..c5a9bc1 100644 (file)
@@ -444,14 +444,17 @@ nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
        if (ret)
                return ret;
 
-       ctxdma = nv50_wndw_ctxdma_new(wndw, fb);
-       if (IS_ERR(ctxdma)) {
-               nouveau_bo_unpin(fb->nvbo);
-               return PTR_ERR(ctxdma);
+       if (wndw->ctxdma.parent) {
+               ctxdma = nv50_wndw_ctxdma_new(wndw, fb);
+               if (IS_ERR(ctxdma)) {
+                       nouveau_bo_unpin(fb->nvbo);
+                       return PTR_ERR(ctxdma);
+               }
+
+               asyw->image.handle[0] = ctxdma->object.handle;
        }
 
        asyw->state.fence = reservation_object_get_excl_rcu(fb->nvbo->bo.resv);
-       asyw->image.handle[0] = ctxdma->object.handle;
        asyw->image.offset[0] = fb->nvbo->bo.offset;
 
        if (wndw->func->prepare) {
index b8cda94..768207f 100644 (file)
@@ -623,7 +623,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
        struct qxl_cursor_cmd *cmd;
        struct qxl_cursor *cursor;
        struct drm_gem_object *obj;
-       struct qxl_bo *cursor_bo = NULL, *user_bo = NULL;
+       struct qxl_bo *cursor_bo = NULL, *user_bo = NULL, *old_cursor_bo = NULL;
        int ret;
        void *user_ptr;
        int size = 64*64*4;
@@ -677,7 +677,7 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
                                                           cursor_bo, 0);
                cmd->type = QXL_CURSOR_SET;
 
-               qxl_bo_unref(&qcrtc->cursor_bo);
+               old_cursor_bo = qcrtc->cursor_bo;
                qcrtc->cursor_bo = cursor_bo;
                cursor_bo = NULL;
        } else {
@@ -697,6 +697,9 @@ static void qxl_cursor_atomic_update(struct drm_plane *plane,
        qxl_push_cursor_ring_release(qdev, release, QXL_CMD_CURSOR, false);
        qxl_release_fence_buffer_objects(release);
 
+       if (old_cursor_bo)
+               qxl_bo_unref(&old_cursor_bo);
+
        qxl_bo_unref(&cursor_bo);
 
        return;
index 08747fc..8232b39 100644 (file)
@@ -17,7 +17,6 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_modes.h>
 #include <drm/drm_of.h>
-#include <drm/drm_panel.h>
 
 #include <uapi/drm/drm_mode.h>
 
@@ -418,9 +417,6 @@ static void sun4i_tcon0_mode_set_lvds(struct sun4i_tcon *tcon,
 static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
                                     const struct drm_display_mode *mode)
 {
-       struct drm_panel *panel = tcon->panel;
-       struct drm_connector *connector = panel->connector;
-       struct drm_display_info display_info = connector->display_info;
        unsigned int bp, hsync, vsync;
        u8 clk_delay;
        u32 val = 0;
@@ -478,27 +474,6 @@ static void sun4i_tcon0_mode_set_rgb(struct sun4i_tcon *tcon,
        if (mode->flags & DRM_MODE_FLAG_PVSYNC)
                val |= SUN4I_TCON0_IO_POL_VSYNC_POSITIVE;
 
-       /*
-        * On A20 and similar SoCs, the only way to achieve Positive Edge
-        * (Rising Edge), is setting dclk clock phase to 2/3(240°).
-        * By default TCON works in Negative Edge(Falling Edge),
-        * this is why phase is set to 0 in that case.
-        * Unfortunately there's no way to logically invert dclk through
-        * IO_POL register.
-        * The only acceptable way to work, triple checked with scope,
-        * is using clock phase set to 0° for Negative Edge and set to 240°
-        * for Positive Edge.
-        * On A33 and similar SoCs there would be a 90° phase option,
-        * but it divides also dclk by 2.
-        * Following code is a way to avoid quirks all around TCON
-        * and DOTCLOCK drivers.
-        */
-       if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_POSEDGE)
-               clk_set_phase(tcon->dclk, 240);
-
-       if (display_info.bus_flags & DRM_BUS_FLAG_PIXDATA_NEGEDGE)
-               clk_set_phase(tcon->dclk, 0);
-
        regmap_update_bits(tcon->regs, SUN4I_TCON0_IO_POL_REG,
                           SUN4I_TCON0_IO_POL_HSYNC_POSITIVE | SUN4I_TCON0_IO_POL_VSYNC_POSITIVE,
                           val);
index 7b8e17b..6bf4da7 100644 (file)
@@ -124,6 +124,8 @@ static const struct hid_device_id hammer_devices[] = {
                     USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_STAFF) },
        { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
                     USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WAND) },
+       { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
+                    USB_VENDOR_ID_GOOGLE, USB_DEVICE_ID_GOOGLE_WHISKERS) },
        { }
 };
 MODULE_DEVICE_TABLE(hid, hammer_devices);
index a85634f..c7981dd 100644 (file)
 #define USB_DEVICE_ID_GOOGLE_TOUCH_ROSE        0x5028
 #define USB_DEVICE_ID_GOOGLE_STAFF     0x502b
 #define USB_DEVICE_ID_GOOGLE_WAND      0x502d
+#define USB_DEVICE_ID_GOOGLE_WHISKERS  0x5030
 
 #define USB_VENDOR_ID_GOTOP            0x08f2
 #define USB_DEVICE_ID_SUPER_Q2         0x007f
index cb86cc8..0422ec2 100644 (file)
@@ -573,7 +573,7 @@ static bool steam_is_valve_interface(struct hid_device *hdev)
 
 static int steam_client_ll_parse(struct hid_device *hdev)
 {
-       struct steam_device *steam = hid_get_drvdata(hdev);
+       struct steam_device *steam = hdev->driver_data;
 
        return hid_parse_report(hdev, steam->hdev->dev_rdesc,
                        steam->hdev->dev_rsize);
@@ -590,7 +590,7 @@ static void steam_client_ll_stop(struct hid_device *hdev)
 
 static int steam_client_ll_open(struct hid_device *hdev)
 {
-       struct steam_device *steam = hid_get_drvdata(hdev);
+       struct steam_device *steam = hdev->driver_data;
        int ret;
 
        ret = hid_hw_open(steam->hdev);
@@ -605,7 +605,7 @@ static int steam_client_ll_open(struct hid_device *hdev)
 
 static void steam_client_ll_close(struct hid_device *hdev)
 {
-       struct steam_device *steam = hid_get_drvdata(hdev);
+       struct steam_device *steam = hdev->driver_data;
 
        mutex_lock(&steam->mutex);
        steam->client_opened = false;
@@ -623,7 +623,7 @@ static int steam_client_ll_raw_request(struct hid_device *hdev,
                                size_t count, unsigned char report_type,
                                int reqtype)
 {
-       struct steam_device *steam = hid_get_drvdata(hdev);
+       struct steam_device *steam = hdev->driver_data;
 
        return hid_hw_raw_request(steam->hdev, reportnum, buf, count,
                        report_type, reqtype);
@@ -710,7 +710,7 @@ static int steam_probe(struct hid_device *hdev,
                ret = PTR_ERR(steam->client_hdev);
                goto client_hdev_fail;
        }
-       hid_set_drvdata(steam->client_hdev, steam);
+       steam->client_hdev->driver_data = steam;
 
        /*
         * With the real steam controller interface, do not connect hidraw.
index 582e449..a2c53ea 100644 (file)
@@ -205,8 +205,7 @@ static void ish_remove(struct pci_dev *pdev)
        kfree(ishtp_dev);
 }
 
-#ifdef CONFIG_PM
-static struct device *ish_resume_device;
+static struct device __maybe_unused *ish_resume_device;
 
 /* 50ms to get resume response */
 #define WAIT_FOR_RESUME_ACK_MS         50
@@ -220,7 +219,7 @@ static struct device *ish_resume_device;
  * in that case a simple resume message is enough, others we need
  * a reset sequence.
  */
-static void ish_resume_handler(struct work_struct *work)
+static void __maybe_unused ish_resume_handler(struct work_struct *work)
 {
        struct pci_dev *pdev = to_pci_dev(ish_resume_device);
        struct ishtp_device *dev = pci_get_drvdata(pdev);
@@ -262,7 +261,7 @@ static void ish_resume_handler(struct work_struct *work)
  *
  * Return: 0 to the pm core
  */
-static int ish_suspend(struct device *device)
+static int __maybe_unused ish_suspend(struct device *device)
 {
        struct pci_dev *pdev = to_pci_dev(device);
        struct ishtp_device *dev = pci_get_drvdata(pdev);
@@ -288,7 +287,7 @@ static int ish_suspend(struct device *device)
        return 0;
 }
 
-static DECLARE_WORK(resume_work, ish_resume_handler);
+static __maybe_unused DECLARE_WORK(resume_work, ish_resume_handler);
 /**
  * ish_resume() - ISH resume callback
  * @device:    device pointer
@@ -297,7 +296,7 @@ static DECLARE_WORK(resume_work, ish_resume_handler);
  *
  * Return: 0 to the pm core
  */
-static int ish_resume(struct device *device)
+static int __maybe_unused ish_resume(struct device *device)
 {
        struct pci_dev *pdev = to_pci_dev(device);
        struct ishtp_device *dev = pci_get_drvdata(pdev);
@@ -311,21 +310,14 @@ static int ish_resume(struct device *device)
        return 0;
 }
 
-static const struct dev_pm_ops ish_pm_ops = {
-       .suspend = ish_suspend,
-       .resume = ish_resume,
-};
-#define ISHTP_ISH_PM_OPS       (&ish_pm_ops)
-#else
-#define ISHTP_ISH_PM_OPS       NULL
-#endif /* CONFIG_PM */
+static SIMPLE_DEV_PM_OPS(ish_pm_ops, ish_suspend, ish_resume);
 
 static struct pci_driver ish_driver = {
        .name = KBUILD_MODNAME,
        .id_table = ish_pci_tbl,
        .probe = ish_probe,
        .remove = ish_remove,
-       .driver.pm = ISHTP_ISH_PM_OPS,
+       .driver.pm = &ish_pm_ops,
 };
 
 module_pci_driver(ish_driver);
index c101369..d679753 100644 (file)
@@ -395,6 +395,14 @@ static void wacom_usage_mapping(struct hid_device *hdev,
                }
        }
 
+       /* 2nd-generation Intuos Pro Large has incorrect Y maximum */
+       if (hdev->vendor == USB_VENDOR_ID_WACOM &&
+           hdev->product == 0x0358 &&
+           WACOM_PEN_FIELD(field) &&
+           wacom_equivalent_usage(usage->hid) == HID_GD_Y) {
+               field->logical_maximum = 43200;
+       }
+
        switch (usage->hid) {
        case HID_GD_X:
                features->x_max = field->logical_maximum;
index bf3bb7e..9d3ef87 100644 (file)
@@ -1074,6 +1074,13 @@ static struct dmi_system_id i8k_blacklist_fan_support_dmi_table[] __initdata = {
                        DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Vostro 3360"),
                },
        },
+       {
+               .ident = "Dell XPS13 9333",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+                       DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "XPS13 9333"),
+               },
+       },
        { }
 };
 
index 155d4d1..f9d1349 100644 (file)
@@ -4175,7 +4175,7 @@ static int nct6775_probe(struct platform_device *pdev)
         * The temperature is already monitored if the respective bit in <mask>
         * is set.
         */
-       for (i = 0; i < 32; i++) {
+       for (i = 0; i < 31; i++) {
                if (!(data->temp_mask & BIT(i + 1)))
                        continue;
                if (!reg_temp_alternate[i])
index 3ae2339..2094d13 100644 (file)
@@ -736,10 +736,6 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
        if (ret)
                return ret;
 
-       if (!file->ucontext &&
-           (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended))
-               return -EINVAL;
-
        if (extended) {
                if (count < (sizeof(hdr) + sizeof(ex_hdr)))
                        return -EINVAL;
@@ -759,6 +755,16 @@ static ssize_t ib_uverbs_write(struct file *filp, const char __user *buf,
                goto out;
        }
 
+       /*
+        * Must be after the ib_dev check, as once the RCU clears ib_dev ==
+        * NULL means ucontext == NULL
+        */
+       if (!file->ucontext &&
+           (command != IB_USER_VERBS_CMD_GET_CONTEXT || extended)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
        if (!verify_command_mask(ib_dev, command, extended)) {
                ret = -EOPNOTSUPP;
                goto out;
index 0b56828..9d6beb9 100644 (file)
@@ -1562,11 +1562,12 @@ EXPORT_SYMBOL(ib_destroy_qp);
 
 /* Completion queues */
 
-struct ib_cq *ib_create_cq(struct ib_device *device,
-                          ib_comp_handler comp_handler,
-                          void (*event_handler)(struct ib_event *, void *),
-                          void *cq_context,
-                          const struct ib_cq_init_attr *cq_attr)
+struct ib_cq *__ib_create_cq(struct ib_device *device,
+                            ib_comp_handler comp_handler,
+                            void (*event_handler)(struct ib_event *, void *),
+                            void *cq_context,
+                            const struct ib_cq_init_attr *cq_attr,
+                            const char *caller)
 {
        struct ib_cq *cq;
 
@@ -1580,12 +1581,13 @@ struct ib_cq *ib_create_cq(struct ib_device *device,
                cq->cq_context    = cq_context;
                atomic_set(&cq->usecnt, 0);
                cq->res.type = RDMA_RESTRACK_CQ;
+               cq->res.kern_name = caller;
                rdma_restrack_add(&cq->res);
        }
 
        return cq;
 }
-EXPORT_SYMBOL(ib_create_cq);
+EXPORT_SYMBOL(__ib_create_cq);
 
 int rdma_set_cq_moderation(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 {
index ed1f253..c7c85c2 100644 (file)
@@ -486,8 +486,11 @@ int mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
        }
 
        if (flags & IB_MR_REREG_ACCESS) {
-               if (ib_access_writable(mr_access_flags) && !mmr->umem->writable)
-                       return -EPERM;
+               if (ib_access_writable(mr_access_flags) &&
+                   !mmr->umem->writable) {
+                       err = -EPERM;
+                       goto release_mpt_entry;
+               }
 
                err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
                                               convert_access(mr_access_flags));
index e52dd21..e3e330f 100644 (file)
@@ -3199,8 +3199,8 @@ static int flow_counters_set_data(struct ib_counters *ibcounters,
        if (!mcounters->hw_cntrs_hndl) {
                mcounters->hw_cntrs_hndl = mlx5_fc_create(
                        to_mdev(ibcounters->device)->mdev, false);
-               if (!mcounters->hw_cntrs_hndl) {
-                       ret = -ENOMEM;
+               if (IS_ERR(mcounters->hw_cntrs_hndl)) {
+                       ret = PTR_ERR(mcounters->hw_cntrs_hndl);
                        goto free;
                }
                hw_hndl = true;
@@ -3546,29 +3546,35 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
                        return ERR_PTR(-ENOMEM);
 
                err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz);
-               if (err) {
-                       kfree(ucmd);
-                       return ERR_PTR(err);
-               }
+               if (err)
+                       goto free_ucmd;
        }
 
-       if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)
-               return ERR_PTR(-ENOMEM);
+       if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) {
+               err = -ENOMEM;
+               goto free_ucmd;
+       }
 
        if (domain != IB_FLOW_DOMAIN_USER ||
            flow_attr->port > dev->num_ports ||
            (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP |
-                                 IB_FLOW_ATTR_FLAGS_EGRESS)))
-               return ERR_PTR(-EINVAL);
+                                 IB_FLOW_ATTR_FLAGS_EGRESS))) {
+               err = -EINVAL;
+               goto free_ucmd;
+       }
 
        if (is_egress &&
            (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
-            flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT))
-               return ERR_PTR(-EINVAL);
+            flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+               err = -EINVAL;
+               goto free_ucmd;
+       }
 
        dst = kzalloc(sizeof(*dst), GFP_KERNEL);
-       if (!dst)
-               return ERR_PTR(-ENOMEM);
+       if (!dst) {
+               err = -ENOMEM;
+               goto free_ucmd;
+       }
 
        mutex_lock(&dev->flow_db->lock);
 
@@ -3637,8 +3643,8 @@ destroy_ft:
 unlock:
        mutex_unlock(&dev->flow_db->lock);
        kfree(dst);
+free_ucmd:
        kfree(ucmd);
-       kfree(handler);
        return ERR_PTR(err);
 }
 
index f7ac8fc..f07b8df 100644 (file)
@@ -1957,6 +1957,9 @@ int qedr_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
        }
 
        if (attr_mask & (IB_QP_AV | IB_QP_PATH_MTU)) {
+               if (rdma_protocol_iwarp(&dev->ibdev, 1))
+                       return -EINVAL;
+
                if (attr_mask & IB_QP_PATH_MTU) {
                        if (attr->path_mtu < IB_MTU_256 ||
                            attr->path_mtu > IB_MTU_4096) {
index f30eeba..8be2723 100644 (file)
@@ -645,6 +645,9 @@ next_wqe:
                } else {
                        goto exit;
                }
+               if ((wqe->wr.send_flags & IB_SEND_SIGNALED) ||
+                   qp->sq_sig_type == IB_SIGNAL_ALL_WR)
+                       rxe_run_task(&qp->comp.task, 1);
                qp->req.wqe_index = next_index(qp->sq.queue,
                                                qp->req.wqe_index);
                goto next_wqe;
@@ -709,6 +712,7 @@ next_wqe:
 
        if (fill_packet(qp, wqe, &pkt, skb, payload)) {
                pr_debug("qp#%d Error during fill packet\n", qp_num(qp));
+               kfree_skb(skb);
                goto err;
        }
 
@@ -740,7 +744,6 @@ next_wqe:
        goto next_wqe;
 
 err:
-       kfree_skb(skb);
        wqe->status = IB_WC_LOC_PROT_ERR;
        wqe->state = wqe_state_error;
        __rxe_do_task(&qp->comp.task);
index 0f52d44..f5fe010 100644 (file)
@@ -199,7 +199,7 @@ static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
 
 fail:
        irq_domain_free_irqs_parent(domain, virq, nr_irqs);
-       gicv2m_unalloc_msi(v2m, hwirq, get_count_order(nr_irqs));
+       gicv2m_unalloc_msi(v2m, hwirq, nr_irqs);
        return err;
 }
 
index 5377d7e..d7842d3 100644 (file)
@@ -182,6 +182,22 @@ static struct its_collection *dev_event_to_col(struct its_device *its_dev,
        return its->collections + its_dev->event_map.col_map[event];
 }
 
+static struct its_collection *valid_col(struct its_collection *col)
+{
+       if (WARN_ON_ONCE(col->target_address & GENMASK_ULL(0, 15)))
+               return NULL;
+
+       return col;
+}
+
+static struct its_vpe *valid_vpe(struct its_node *its, struct its_vpe *vpe)
+{
+       if (valid_col(its->collections + vpe->col_idx))
+               return vpe;
+
+       return NULL;
+}
+
 /*
  * ITS command descriptors - parameters to be encoded in a command
  * block.
@@ -439,7 +455,7 @@ static struct its_collection *its_build_mapti_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return col;
+       return valid_col(col);
 }
 
 static struct its_collection *its_build_movi_cmd(struct its_node *its,
@@ -458,7 +474,7 @@ static struct its_collection *its_build_movi_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return col;
+       return valid_col(col);
 }
 
 static struct its_collection *its_build_discard_cmd(struct its_node *its,
@@ -476,7 +492,7 @@ static struct its_collection *its_build_discard_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return col;
+       return valid_col(col);
 }
 
 static struct its_collection *its_build_inv_cmd(struct its_node *its,
@@ -494,7 +510,7 @@ static struct its_collection *its_build_inv_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return col;
+       return valid_col(col);
 }
 
 static struct its_collection *its_build_int_cmd(struct its_node *its,
@@ -512,7 +528,7 @@ static struct its_collection *its_build_int_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return col;
+       return valid_col(col);
 }
 
 static struct its_collection *its_build_clear_cmd(struct its_node *its,
@@ -530,7 +546,7 @@ static struct its_collection *its_build_clear_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return col;
+       return valid_col(col);
 }
 
 static struct its_collection *its_build_invall_cmd(struct its_node *its,
@@ -554,7 +570,7 @@ static struct its_vpe *its_build_vinvall_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return desc->its_vinvall_cmd.vpe;
+       return valid_vpe(its, desc->its_vinvall_cmd.vpe);
 }
 
 static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
@@ -576,7 +592,7 @@ static struct its_vpe *its_build_vmapp_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return desc->its_vmapp_cmd.vpe;
+       return valid_vpe(its, desc->its_vmapp_cmd.vpe);
 }
 
 static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
@@ -599,7 +615,7 @@ static struct its_vpe *its_build_vmapti_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return desc->its_vmapti_cmd.vpe;
+       return valid_vpe(its, desc->its_vmapti_cmd.vpe);
 }
 
 static struct its_vpe *its_build_vmovi_cmd(struct its_node *its,
@@ -622,7 +638,7 @@ static struct its_vpe *its_build_vmovi_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return desc->its_vmovi_cmd.vpe;
+       return valid_vpe(its, desc->its_vmovi_cmd.vpe);
 }
 
 static struct its_vpe *its_build_vmovp_cmd(struct its_node *its,
@@ -640,7 +656,7 @@ static struct its_vpe *its_build_vmovp_cmd(struct its_node *its,
 
        its_fixup_cmd(cmd);
 
-       return desc->its_vmovp_cmd.vpe;
+       return valid_vpe(its, desc->its_vmovp_cmd.vpe);
 }
 
 static u64 its_cmd_ptr_to_offset(struct its_node *its,
@@ -1824,11 +1840,16 @@ static int its_alloc_tables(struct its_node *its)
 
 static int its_alloc_collections(struct its_node *its)
 {
+       int i;
+
        its->collections = kcalloc(nr_cpu_ids, sizeof(*its->collections),
                                   GFP_KERNEL);
        if (!its->collections)
                return -ENOMEM;
 
+       for (i = 0; i < nr_cpu_ids; i++)
+               its->collections[i].target_address = ~0ULL;
+
        return 0;
 }
 
@@ -2310,7 +2331,14 @@ static int its_irq_domain_activate(struct irq_domain *domain,
                cpu_mask = cpumask_of_node(its_dev->its->numa_node);
 
        /* Bind the LPI to the first possible CPU */
-       cpu = cpumask_first(cpu_mask);
+       cpu = cpumask_first_and(cpu_mask, cpu_online_mask);
+       if (cpu >= nr_cpu_ids) {
+               if (its_dev->its->flags & ITS_FLAGS_WORKAROUND_CAVIUM_23144)
+                       return -EINVAL;
+
+               cpu = cpumask_first(cpu_online_mask);
+       }
+
        its_dev->event_map.col_map[event] = cpu;
        irq_data_update_effective_affinity(d, cpumask_of(cpu));
 
@@ -3399,6 +3427,16 @@ static int redist_disable_lpis(void)
        u64 timeout = USEC_PER_SEC;
        u64 val;
 
+       /*
+        * If coming via a CPU hotplug event, we don't need to disable
+        * LPIs before trying to re-enable them. They are already
+        * configured and all is well in the world. Detect this case
+        * by checking the allocation of the pending table for the
+        * current CPU.
+        */
+       if (gic_data_rdist()->pend_page)
+               return 0;
+
        if (!gic_rdists_supports_plpis()) {
                pr_info("CPU%d: LPIs not supported\n", smp_processor_id());
                return -ENXIO;
index 1ec3bfe..c671b32 100644 (file)
@@ -93,8 +93,12 @@ static void ls_scfg_msi_compose_msg(struct irq_data *data, struct msi_msg *msg)
        msg->address_lo = lower_32_bits(msi_data->msiir_addr);
        msg->data = data->hwirq;
 
-       if (msi_affinity_flag)
-               msg->data |= cpumask_first(data->common->affinity);
+       if (msi_affinity_flag) {
+               const struct cpumask *mask;
+
+               mask = irq_data_get_effective_affinity_mask(data);
+               msg->data |= cpumask_first(mask);
+       }
 
        iommu_dma_map_msi_msg(data->irq, msg);
 }
@@ -121,7 +125,7 @@ static int ls_scfg_msi_set_affinity(struct irq_data *irq_data,
                return -EINVAL;
        }
 
-       cpumask_copy(irq_data->common->affinity, mask);
+       irq_data_update_effective_affinity(irq_data, cpumask_of(cpu));
 
        return IRQ_SET_MASK_OK;
 }
index 10c0898..9c03f35 100644 (file)
@@ -4,7 +4,7 @@
 
 menuconfig NVM
        bool "Open-Channel SSD target support"
-       depends on BLOCK && HAS_DMA && PCI
+       depends on BLOCK && PCI
        select BLK_DEV_NVME
        help
          Say Y here to get to enable Open-channel SSDs.
index 973c1fb..99038df 100644 (file)
@@ -79,7 +79,6 @@ void enic_rfs_flw_tbl_init(struct enic *enic)
        enic->rfs_h.max = enic->config.num_arfs;
        enic->rfs_h.free = enic->rfs_h.max;
        enic->rfs_h.toclean = 0;
-       enic_rfs_timer_start(enic);
 }
 
 void enic_rfs_flw_tbl_free(struct enic *enic)
@@ -88,7 +87,6 @@ void enic_rfs_flw_tbl_free(struct enic *enic)
 
        enic_rfs_timer_stop(enic);
        spin_lock_bh(&enic->rfs_h.lock);
-       enic->rfs_h.free = 0;
        for (i = 0; i < (1 << ENIC_RFS_FLW_BITSHIFT); i++) {
                struct hlist_head *hhead;
                struct hlist_node *tmp;
@@ -99,6 +97,7 @@ void enic_rfs_flw_tbl_free(struct enic *enic)
                        enic_delfltr(enic, n->fltr_id);
                        hlist_del(&n->node);
                        kfree(n);
+                       enic->rfs_h.free++;
                }
        }
        spin_unlock_bh(&enic->rfs_h.lock);
index 30d2eaa..90c645b 100644 (file)
@@ -1920,7 +1920,7 @@ static int enic_open(struct net_device *netdev)
 {
        struct enic *enic = netdev_priv(netdev);
        unsigned int i;
-       int err;
+       int err, ret;
 
        err = enic_request_intr(enic);
        if (err) {
@@ -1971,16 +1971,15 @@ static int enic_open(struct net_device *netdev)
                vnic_intr_unmask(&enic->intr[i]);
 
        enic_notify_timer_start(enic);
-       enic_rfs_flw_tbl_init(enic);
+       enic_rfs_timer_start(enic);
 
        return 0;
 
 err_out_free_rq:
        for (i = 0; i < enic->rq_count; i++) {
-               err = vnic_rq_disable(&enic->rq[i]);
-               if (err)
-                       return err;
-               vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
+               ret = vnic_rq_disable(&enic->rq[i]);
+               if (!ret)
+                       vnic_rq_clean(&enic->rq[i], enic_free_rq_buf);
        }
        enic_dev_notify_unset(enic);
 err_out_free_intr:
@@ -2904,6 +2903,7 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        timer_setup(&enic->notify_timer, enic_notify_timer, 0);
 
+       enic_rfs_flw_tbl_init(enic);
        enic_set_rx_coal_setting(enic);
        INIT_WORK(&enic->reset, enic_reset);
        INIT_WORK(&enic->tx_hang_reset, enic_tx_hang_reset);
index 78db8e6..ed6c76d 100644 (file)
@@ -1735,8 +1735,8 @@ static void ftgmac100_ncsi_handler(struct ncsi_dev *nd)
        if (unlikely(nd->state != ncsi_dev_state_functional))
                return;
 
-       netdev_info(nd->dev, "NCSI interface %s\n",
-                   nd->link_up ? "up" : "down");
+       netdev_dbg(nd->dev, "NCSI interface %s\n",
+                  nd->link_up ? "up" : "down");
 }
 
 static void ftgmac100_setup_clk(struct ftgmac100 *priv)
index 8ffb745..ed6dbcf 100644 (file)
@@ -2103,9 +2103,8 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
        unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
 #else
        unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
-                               SKB_DATA_ALIGN(I40E_SKB_PAD +
-                                              (xdp->data_end -
-                                               xdp->data_hard_start));
+                               SKB_DATA_ALIGN(xdp->data_end -
+                                              xdp->data_hard_start);
 #endif
        struct sk_buff *skb;
 
@@ -2124,7 +2123,7 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring,
                return NULL;
 
        /* update pointers within the skb to store the data */
-       skb_reserve(skb, I40E_SKB_PAD + (xdp->data - xdp->data_hard_start));
+       skb_reserve(skb, xdp->data - xdp->data_hard_start);
        __skb_put(skb, xdp->data_end - xdp->data);
        if (metasize)
                skb_metadata_set(skb, metasize);
index 8f31406..f0b0138 100644 (file)
@@ -255,9 +255,8 @@ qed_dcbx_get_app_protocol_type(struct qed_hwfn *p_hwfn,
                *type = DCBX_PROTOCOL_ROCE_V2;
        } else {
                *type = DCBX_MAX_PROTOCOL_TYPE;
-               DP_ERR(p_hwfn,
-                      "No action required, App TLV id = 0x%x app_prio_bitmap = 0x%x\n",
-                      id, app_prio_bitmap);
+               DP_ERR(p_hwfn, "No action required, App TLV entry = 0x%x\n",
+                      app_prio_bitmap);
                return false;
        }
 
@@ -1479,8 +1478,8 @@ static u8 qed_dcbnl_getcap(struct qed_dev *cdev, int capid, u8 *cap)
                *cap = 0x80;
                break;
        case DCB_CAP_ATTR_DCBX:
-               *cap = (DCB_CAP_DCBX_LLD_MANAGED | DCB_CAP_DCBX_VER_CEE |
-                       DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_STATIC);
+               *cap = (DCB_CAP_DCBX_VER_CEE | DCB_CAP_DCBX_VER_IEEE |
+                       DCB_CAP_DCBX_STATIC);
                break;
        default:
                *cap = false;
@@ -1548,8 +1547,6 @@ static u8 qed_dcbnl_getdcbx(struct qed_dev *cdev)
        if (!dcbx_info)
                return 0;
 
-       if (dcbx_info->operational.enabled)
-               mode |= DCB_CAP_DCBX_LLD_MANAGED;
        if (dcbx_info->operational.ieee)
                mode |= DCB_CAP_DCBX_VER_IEEE;
        if (dcbx_info->operational.cee)
index c97ebd6..012973d 100644 (file)
@@ -201,8 +201,9 @@ void qed_ll2b_complete_rx_packet(void *cxt, struct qed_ll2_comp_rx_data *data)
 
        skb = build_skb(buffer->data, 0);
        if (!skb) {
-               rc = -ENOMEM;
-               goto out_post;
+               DP_INFO(cdev, "Failed to build SKB\n");
+               kfree(buffer->data);
+               goto out_post1;
        }
 
        data->u.placement_offset += NET_SKB_PAD;
@@ -224,8 +225,14 @@ void qed_ll2b_complete_rx_packet(void *cxt, struct qed_ll2_comp_rx_data *data)
                cdev->ll2->cbs->rx_cb(cdev->ll2->cb_cookie, skb,
                                      data->opaque_data_0,
                                      data->opaque_data_1);
+       } else {
+               DP_VERBOSE(p_hwfn, (NETIF_MSG_RX_STATUS | NETIF_MSG_PKTDATA |
+                                   QED_MSG_LL2 | QED_MSG_STORAGE),
+                          "Dropping the packet\n");
+               kfree(buffer->data);
        }
 
+out_post1:
        /* Update Buffer information and update FW producer */
        buffer->data = new_data;
        buffer->phys_addr = new_phys_addr;
index b04d57c..5c10fd7 100644 (file)
@@ -567,8 +567,16 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance)
                /* Fastpath interrupts */
                for (j = 0; j < 64; j++) {
                        if ((0x2ULL << j) & status) {
-                               hwfn->simd_proto_handler[j].func(
-                                       hwfn->simd_proto_handler[j].token);
+                               struct qed_simd_fp_handler *p_handler =
+                                       &hwfn->simd_proto_handler[j];
+
+                               if (p_handler->func)
+                                       p_handler->func(p_handler->token);
+                               else
+                                       DP_NOTICE(hwfn,
+                                                 "Not calling fastpath handler as it is NULL [handler #%d, status 0x%llx]\n",
+                                                 j, status);
+
                                status &= ~(0x2ULL << j);
                                rc = IRQ_HANDLED;
                        }
index cb5b0f5..edf2036 100644 (file)
@@ -111,7 +111,7 @@ config DWMAC_ROCKCHIP
 config DWMAC_SOCFPGA
        tristate "SOCFPGA dwmac support"
        default ARCH_SOCFPGA
-       depends on OF && (ARCH_SOCFPGA || COMPILE_TEST)
+       depends on OF && (ARCH_SOCFPGA || ARCH_STRATIX10 || COMPILE_TEST)
        select MFD_SYSCON
        help
          Support for ethernet controller on Altera SOCFPGA
index 6e35957..5b3b06a 100644 (file)
@@ -55,6 +55,7 @@ struct socfpga_dwmac {
        struct  device *dev;
        struct regmap *sys_mgr_base_addr;
        struct reset_control *stmmac_rst;
+       struct reset_control *stmmac_ocp_rst;
        void __iomem *splitter_base;
        bool f2h_ptp_ref_clk;
        struct tse_pcs pcs;
@@ -262,8 +263,8 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
                val = SYSMGR_EMACGRP_CTRL_PHYSEL_ENUM_GMII_MII;
 
        /* Assert reset to the enet controller before changing the phy mode */
-       if (dwmac->stmmac_rst)
-               reset_control_assert(dwmac->stmmac_rst);
+       reset_control_assert(dwmac->stmmac_ocp_rst);
+       reset_control_assert(dwmac->stmmac_rst);
 
        regmap_read(sys_mgr_base_addr, reg_offset, &ctrl);
        ctrl &= ~(SYSMGR_EMACGRP_CTRL_PHYSEL_MASK << reg_shift);
@@ -288,8 +289,8 @@ static int socfpga_dwmac_set_phy_mode(struct socfpga_dwmac *dwmac)
        /* Deassert reset for the phy configuration to be sampled by
         * the enet controller, and operation to start in requested mode
         */
-       if (dwmac->stmmac_rst)
-               reset_control_deassert(dwmac->stmmac_rst);
+       reset_control_deassert(dwmac->stmmac_ocp_rst);
+       reset_control_deassert(dwmac->stmmac_rst);
        if (phymode == PHY_INTERFACE_MODE_SGMII) {
                if (tse_pcs_init(dwmac->pcs.tse_pcs_base, &dwmac->pcs) != 0) {
                        dev_err(dwmac->dev, "Unable to initialize TSE PCS");
@@ -324,6 +325,15 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
                goto err_remove_config_dt;
        }
 
+       dwmac->stmmac_ocp_rst = devm_reset_control_get_optional(dev, "stmmaceth-ocp");
+       if (IS_ERR(dwmac->stmmac_ocp_rst)) {
+               ret = PTR_ERR(dwmac->stmmac_ocp_rst);
+               dev_err(dev, "error getting reset control of ocp %d\n", ret);
+               goto err_remove_config_dt;
+       }
+
+       reset_control_deassert(dwmac->stmmac_ocp_rst);
+
        ret = socfpga_dwmac_parse_data(dwmac, dev);
        if (ret) {
                dev_err(dev, "Unable to parse OF data\n");
index e79b0d7..cba46b6 100644 (file)
@@ -928,6 +928,7 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
 static int stmmac_init_phy(struct net_device *dev)
 {
        struct stmmac_priv *priv = netdev_priv(dev);
+       u32 tx_cnt = priv->plat->tx_queues_to_use;
        struct phy_device *phydev;
        char phy_id_fmt[MII_BUS_ID_SIZE + 3];
        char bus_id[MII_BUS_ID_SIZE];
@@ -968,6 +969,15 @@ static int stmmac_init_phy(struct net_device *dev)
                phydev->advertising &= ~(SUPPORTED_1000baseT_Half |
                                         SUPPORTED_1000baseT_Full);
 
+       /*
+        * Half-duplex mode not supported with multiqueue
+        * half-duplex can only works with single queue
+        */
+       if (tx_cnt > 1)
+               phydev->supported &= ~(SUPPORTED_1000baseT_Half |
+                                      SUPPORTED_100baseT_Half |
+                                      SUPPORTED_10baseT_Half);
+
        /*
         * Broken HW is sometimes missing the pull-up resistor on the
         * MDIO line, which results in reads to non-existent devices returning
index 7a16d40..b9221fc 100644 (file)
@@ -60,8 +60,7 @@
 #include <linux/sungem_phy.h>
 #include "sungem.h"
 
-/* Stripping FCS is causing problems, disabled for now */
-#undef STRIP_FCS
+#define STRIP_FCS
 
 #define DEFAULT_MSG    (NETIF_MSG_DRV          | \
                         NETIF_MSG_PROBE        | \
@@ -435,7 +434,7 @@ static int gem_rxmac_reset(struct gem *gp)
        writel(desc_dma & 0xffffffff, gp->regs + RXDMA_DBLOW);
        writel(RX_RING_SIZE - 4, gp->regs + RXDMA_KICK);
        val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) |
-              ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128);
+              (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128);
        writel(val, gp->regs + RXDMA_CFG);
        if (readl(gp->regs + GREG_BIFCFG) & GREG_BIFCFG_M66EN)
                writel(((5 & RXDMA_BLANK_IPKTS) |
@@ -760,7 +759,6 @@ static int gem_rx(struct gem *gp, int work_to_do)
        struct net_device *dev = gp->dev;
        int entry, drops, work_done = 0;
        u32 done;
-       __sum16 csum;
 
        if (netif_msg_rx_status(gp))
                printk(KERN_DEBUG "%s: rx interrupt, done: %d, rx_new: %d\n",
@@ -855,9 +853,13 @@ static int gem_rx(struct gem *gp, int work_to_do)
                        skb = copy_skb;
                }
 
-               csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff);
-               skb->csum = csum_unfold(csum);
-               skb->ip_summed = CHECKSUM_COMPLETE;
+               if (likely(dev->features & NETIF_F_RXCSUM)) {
+                       __sum16 csum;
+
+                       csum = (__force __sum16)htons((status & RXDCTRL_TCPCSUM) ^ 0xffff);
+                       skb->csum = csum_unfold(csum);
+                       skb->ip_summed = CHECKSUM_COMPLETE;
+               }
                skb->protocol = eth_type_trans(skb, gp->dev);
 
                napi_gro_receive(&gp->napi, skb);
@@ -1761,7 +1763,7 @@ static void gem_init_dma(struct gem *gp)
        writel(0, gp->regs + TXDMA_KICK);
 
        val = (RXDMA_CFG_BASE | (RX_OFFSET << 10) |
-              ((14 / 2) << 13) | RXDMA_CFG_FTHRESH_128);
+              (ETH_HLEN << 13) | RXDMA_CFG_FTHRESH_128);
        writel(val, gp->regs + RXDMA_CFG);
 
        writel(desc_dma >> 32, gp->regs + RXDMA_DBHI);
@@ -2985,8 +2987,8 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
        pci_set_drvdata(pdev, dev);
 
        /* We can do scatter/gather and HW checksum */
-       dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM;
-       dev->features |= dev->hw_features | NETIF_F_RXCSUM;
+       dev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+       dev->features = dev->hw_features;
        if (pci_using_dac)
                dev->features |= NETIF_F_HIGHDMA;
 
index 06d7c9e..a1a6445 100644 (file)
@@ -1385,6 +1385,11 @@ static int emac_devioctl(struct net_device *ndev, struct ifreq *ifrq, int cmd)
                return -EOPNOTSUPP;
 }
 
+static int match_first_device(struct device *dev, void *data)
+{
+       return !strncmp(dev_name(dev), "davinci_mdio", 12);
+}
+
 /**
  * emac_dev_open - EMAC device open
  * @ndev: The DaVinci EMAC network adapter
@@ -1484,8 +1489,14 @@ static int emac_dev_open(struct net_device *ndev)
 
        /* use the first phy on the bus if pdata did not give us a phy id */
        if (!phydev && !priv->phy_id) {
-               phy = bus_find_device_by_name(&mdio_bus_type, NULL,
-                                             "davinci_mdio");
+               /* NOTE: we can't use bus_find_device_by_name() here because
+                * the device name is not guaranteed to be 'davinci_mdio'. On
+                * some systems it can be 'davinci_mdio.0' so we need to use
+                * strncmp() against the first part of the string to correctly
+                * match it.
+                */
+               phy = bus_find_device(&mdio_bus_type, NULL, NULL,
+                                     match_first_device);
                if (phy) {
                        priv->phy_id = dev_name(phy);
                        if (!priv->phy_id || !*priv->phy_id)
index f347fd9..777fa59 100644 (file)
 static const char banner[] __initconst = KERN_INFO \
        "AX.25: bpqether driver version 004\n";
 
-static char bcast_addr[6]={0xFF,0xFF,0xFF,0xFF,0xFF,0xFF};
-
-static char bpq_eth_addr[6];
-
 static int bpq_rcv(struct sk_buff *, struct net_device *, struct packet_type *, struct net_device *);
 static int bpq_device_event(struct notifier_block *, unsigned long, void *);
 
@@ -501,8 +497,8 @@ static int bpq_new_device(struct net_device *edev)
        bpq->ethdev = edev;
        bpq->axdev = ndev;
 
-       memcpy(bpq->dest_addr, bcast_addr, sizeof(bpq_eth_addr));
-       memcpy(bpq->acpt_addr, bcast_addr, sizeof(bpq_eth_addr));
+       eth_broadcast_addr(bpq->dest_addr);
+       eth_broadcast_addr(bpq->acpt_addr);
 
        err = register_netdevice(ndev);
        if (err)
index 4377c26..d02f0a7 100644 (file)
@@ -693,6 +693,7 @@ void ipvlan_link_setup(struct net_device *dev)
 {
        ether_setup(dev);
 
+       dev->max_mtu = ETH_MAX_MTU;
        dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
        dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
        dev->netdev_ops = &ipvlan_netdev_ops;
index 83f7420..4f390fa 100644 (file)
@@ -527,7 +527,7 @@ static int net_failover_slave_register(struct net_device *slave_dev,
 
        netif_addr_lock_bh(failover_dev);
        dev_uc_sync_multiple(slave_dev, failover_dev);
-       dev_uc_sync_multiple(slave_dev, failover_dev);
+       dev_mc_sync_multiple(slave_dev, failover_dev);
        netif_addr_unlock_bh(failover_dev);
 
        err = vlan_vids_add_by_dev(slave_dev, failover_dev);
index b0e8b96..1eaec64 100644 (file)
@@ -967,8 +967,7 @@ void cdc_ncm_unbind(struct usbnet *dev, struct usb_interface *intf)
 
        atomic_set(&ctx->stop, 1);
 
-       if (hrtimer_active(&ctx->tx_timer))
-               hrtimer_cancel(&ctx->tx_timer);
+       hrtimer_cancel(&ctx->tx_timer);
 
        tasklet_kill(&ctx->bh);
 
index 21710a7..46df030 100644 (file)
@@ -1808,6 +1808,7 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
                u32 max_segments =
                        (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
 
+               max_segments = min_not_zero(max_segments, ctrl->max_segments);
                blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
                blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
        }
index b528a2f..41d45a1 100644 (file)
@@ -2790,6 +2790,9 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
        /* re-enable the admin_q so anything new can fast fail */
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 
+       /* resume the io queues so that things will fast fail */
+       nvme_start_queues(&ctrl->ctrl);
+
        nvme_fc_ctlr_inactive_on_rport(ctrl);
 }
 
@@ -2804,9 +2807,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl)
         * waiting for io to terminate
         */
        nvme_fc_delete_association(ctrl);
-
-       /* resume the io queues so that things will fast fail */
-       nvme_start_queues(nctrl);
 }
 
 static void
index 231807c..0c4a33d 100644 (file)
@@ -170,6 +170,7 @@ struct nvme_ctrl {
        u64 cap;
        u32 page_size;
        u32 max_hw_sectors;
+       u32 max_segments;
        u16 oncs;
        u16 oacs;
        u16 nssa;
index fc33804..ba943f2 100644 (file)
 
 #define SGES_PER_PAGE  (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
 
+/*
+ * These can be higher, but we need to ensure that any command doesn't
+ * require an sg allocation that needs more than a page of data.
+ */
+#define NVME_MAX_KB_SZ 4096
+#define NVME_MAX_SEGS  127
+
 static int use_threaded_interrupts;
 module_param(use_threaded_interrupts, int, 0);
 
@@ -100,6 +107,8 @@ struct nvme_dev {
        struct nvme_ctrl ctrl;
        struct completion ioq_wait;
 
+       mempool_t *iod_mempool;
+
        /* shadow doorbell buffer support: */
        u32 *dbbuf_dbs;
        dma_addr_t dbbuf_dbs_dma_addr;
@@ -477,10 +486,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
        iod->use_sgl = nvme_pci_use_sgls(dev, rq);
 
        if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
-               size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
-                               iod->use_sgl);
-
-               iod->sg = kmalloc(alloc_size, GFP_ATOMIC);
+               iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
                if (!iod->sg)
                        return BLK_STS_RESOURCE;
        } else {
@@ -526,7 +532,7 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
        }
 
        if (iod->sg != iod->inline_sg)
-               kfree(iod->sg);
+               mempool_free(iod->sg, dev->iod_mempool);
 }
 
 #ifdef CONFIG_BLK_DEV_INTEGRITY
@@ -2280,6 +2286,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
                blk_put_queue(dev->ctrl.admin_q);
        kfree(dev->queues);
        free_opal_dev(dev->ctrl.opal_dev);
+       mempool_destroy(dev->iod_mempool);
        kfree(dev);
 }
 
@@ -2289,6 +2296,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
 
        nvme_get_ctrl(&dev->ctrl);
        nvme_dev_disable(dev, false);
+       nvme_kill_queues(&dev->ctrl);
        if (!queue_work(nvme_wq, &dev->remove_work))
                nvme_put_ctrl(&dev->ctrl);
 }
@@ -2333,6 +2341,13 @@ static void nvme_reset_work(struct work_struct *work)
        if (result)
                goto out;
 
+       /*
+        * Limit the max command size to prevent iod->sg allocations going
+        * over a single page.
+        */
+       dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
+       dev->ctrl.max_segments = NVME_MAX_SEGS;
+
        result = nvme_init_identify(&dev->ctrl);
        if (result)
                goto out;
@@ -2405,7 +2420,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
        struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
-       nvme_kill_queues(&dev->ctrl);
        if (pci_get_drvdata(pdev))
                device_release_driver(&pdev->dev);
        nvme_put_ctrl(&dev->ctrl);
@@ -2509,6 +2523,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        int node, result = -ENOMEM;
        struct nvme_dev *dev;
        unsigned long quirks = id->driver_data;
+       size_t alloc_size;
 
        node = dev_to_node(&pdev->dev);
        if (node == NUMA_NO_NODE)
@@ -2546,6 +2561,23 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (result)
                goto release_pools;
 
+       /*
+        * Double check that our mempool alloc size will cover the biggest
+        * command we support.
+        */
+       alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ,
+                                               NVME_MAX_SEGS, true);
+       WARN_ON_ONCE(alloc_size > PAGE_SIZE);
+
+       dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
+                                               mempool_kfree,
+                                               (void *) alloc_size,
+                                               GFP_KERNEL, node);
+       if (!dev->iod_mempool) {
+               result = -ENOMEM;
+               goto release_pools;
+       }
+
        dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
 
        nvme_get_ctrl(&dev->ctrl);
index c9424da..9544625 100644 (file)
@@ -560,12 +560,6 @@ static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
        if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
                return;
 
-       if (nvme_rdma_queue_idx(queue) == 0) {
-               nvme_rdma_free_qe(queue->device->dev,
-                       &queue->ctrl->async_event_sqe,
-                       sizeof(struct nvme_command), DMA_TO_DEVICE);
-       }
-
        nvme_rdma_destroy_queue_ib(queue);
        rdma_destroy_id(queue->cm_id);
 }
@@ -698,7 +692,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
                set = &ctrl->tag_set;
                memset(set, 0, sizeof(*set));
                set->ops = &nvme_rdma_mq_ops;
-               set->queue_depth = nctrl->opts->queue_size;
+               set->queue_depth = nctrl->sqsize + 1;
                set->reserved_tags = 1; /* fabric connect */
                set->numa_node = NUMA_NO_NODE;
                set->flags = BLK_MQ_F_SHOULD_MERGE;
@@ -734,11 +728,12 @@ out:
 static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
-       nvme_rdma_stop_queue(&ctrl->queues[0]);
        if (remove) {
                blk_cleanup_queue(ctrl->ctrl.admin_q);
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
        }
+       nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+               sizeof(struct nvme_command), DMA_TO_DEVICE);
        nvme_rdma_free_queue(&ctrl->queues[0]);
 }
 
@@ -755,11 +750,16 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
 
        ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
 
+       error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+                       sizeof(struct nvme_command), DMA_TO_DEVICE);
+       if (error)
+               goto out_free_queue;
+
        if (new) {
                ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
                if (IS_ERR(ctrl->ctrl.admin_tagset)) {
                        error = PTR_ERR(ctrl->ctrl.admin_tagset);
-                       goto out_free_queue;
+                       goto out_free_async_qe;
                }
 
                ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
@@ -795,12 +795,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
        if (error)
                goto out_stop_queue;
 
-       error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev,
-                       &ctrl->async_event_sqe, sizeof(struct nvme_command),
-                       DMA_TO_DEVICE);
-       if (error)
-               goto out_stop_queue;
-
        return 0;
 
 out_stop_queue:
@@ -811,6 +805,9 @@ out_cleanup_queue:
 out_free_tagset:
        if (new)
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
+out_free_async_qe:
+       nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe,
+               sizeof(struct nvme_command), DMA_TO_DEVICE);
 out_free_queue:
        nvme_rdma_free_queue(&ctrl->queues[0]);
        return error;
@@ -819,7 +816,6 @@ out_free_queue:
 static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
                bool remove)
 {
-       nvme_rdma_stop_io_queues(ctrl);
        if (remove) {
                blk_cleanup_queue(ctrl->ctrl.connect_q);
                nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
@@ -888,9 +884,9 @@ static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
        list_del(&ctrl->list);
        mutex_unlock(&nvme_rdma_ctrl_mutex);
 
-       kfree(ctrl->queues);
        nvmf_free_options(nctrl->opts);
 free_ctrl:
+       kfree(ctrl->queues);
        kfree(ctrl);
 }
 
@@ -949,6 +945,7 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
        return;
 
 destroy_admin:
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        nvme_rdma_destroy_admin_queue(ctrl, false);
 requeue:
        dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
@@ -965,12 +962,14 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work)
 
        if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
+               nvme_rdma_stop_io_queues(ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
                nvme_rdma_destroy_io_queues(ctrl, false);
        }
 
        blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
        nvme_rdma_destroy_admin_queue(ctrl, false);
@@ -1736,6 +1735,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
 {
        if (ctrl->ctrl.queue_count > 1) {
                nvme_stop_queues(&ctrl->ctrl);
+               nvme_rdma_stop_io_queues(ctrl);
                blk_mq_tagset_busy_iter(&ctrl->tag_set,
                                        nvme_cancel_request, &ctrl->ctrl);
                nvme_rdma_destroy_io_queues(ctrl, shutdown);
@@ -1747,6 +1747,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
                nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap);
 
        blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
                                nvme_cancel_request, &ctrl->ctrl);
        blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
@@ -1932,11 +1933,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                goto out_free_ctrl;
        }
 
-       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
-                               0 /* no quirks, we're perfect! */);
-       if (ret)
-               goto out_free_ctrl;
-
        INIT_DELAYED_WORK(&ctrl->reconnect_work,
                        nvme_rdma_reconnect_ctrl_work);
        INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
@@ -1950,14 +1946,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),
                                GFP_KERNEL);
        if (!ctrl->queues)
-               goto out_uninit_ctrl;
+               goto out_free_ctrl;
+
+       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
+                               0 /* no quirks, we're perfect! */);
+       if (ret)
+               goto out_kfree_queues;
 
        changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
        WARN_ON_ONCE(!changed);
 
        ret = nvme_rdma_configure_admin_queue(ctrl, true);
        if (ret)
-               goto out_kfree_queues;
+               goto out_uninit_ctrl;
 
        /* sanity check icdoff */
        if (ctrl->ctrl.icdoff) {
@@ -1974,20 +1975,19 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
                goto out_remove_admin_queue;
        }
 
-       if (opts->queue_size > ctrl->ctrl.maxcmd) {
-               /* warn if maxcmd is lower than queue_size */
-               dev_warn(ctrl->ctrl.device,
-                       "queue_size %zu > ctrl maxcmd %u, clamping down\n",
-                       opts->queue_size, ctrl->ctrl.maxcmd);
-               opts->queue_size = ctrl->ctrl.maxcmd;
-       }
-
+       /* only warn if argument is too large here, will clamp later */
        if (opts->queue_size > ctrl->ctrl.sqsize + 1) {
-               /* warn if sqsize is lower than queue_size */
                dev_warn(ctrl->ctrl.device,
                        "queue_size %zu > ctrl sqsize %u, clamping down\n",
                        opts->queue_size, ctrl->ctrl.sqsize + 1);
-               opts->queue_size = ctrl->ctrl.sqsize + 1;
+       }
+
+       /* warn if maxcmd is lower than sqsize+1 */
+       if (ctrl->ctrl.sqsize + 1 > ctrl->ctrl.maxcmd) {
+               dev_warn(ctrl->ctrl.device,
+                       "sqsize %u > ctrl maxcmd %u, clamping down\n",
+                       ctrl->ctrl.sqsize + 1, ctrl->ctrl.maxcmd);
+               ctrl->ctrl.sqsize = ctrl->ctrl.maxcmd - 1;
        }
 
        if (opts->nr_io_queues) {
@@ -2013,15 +2013,16 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
        return &ctrl->ctrl;
 
 out_remove_admin_queue:
+       nvme_rdma_stop_queue(&ctrl->queues[0]);
        nvme_rdma_destroy_admin_queue(ctrl, true);
-out_kfree_queues:
-       kfree(ctrl->queues);
 out_uninit_ctrl:
        nvme_uninit_ctrl(&ctrl->ctrl);
        nvme_put_ctrl(&ctrl->ctrl);
        if (ret > 0)
                ret = -EIO;
        return ERR_PTR(ret);
+out_kfree_queues:
+       kfree(ctrl->queues);
 out_free_ctrl:
        kfree(ctrl);
        return ERR_PTR(ret);
index a03da76..74d4b78 100644 (file)
@@ -686,6 +686,14 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
        }
 
        ctrl->csts = NVME_CSTS_RDY;
+
+       /*
+        * Controllers that are not yet enabled should not really enforce the
+        * keep alive timeout, but we still want to track a timeout and cleanup
+        * in case a host died before it enabled the controller.  Hence, simply
+        * reset the keep alive timer when the controller is enabled.
+        */
+       mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
 }
 
 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
index ab2f3fe..31ff03d 100644 (file)
@@ -598,7 +598,7 @@ static int _generic_set_opp_regulator(const struct opp_table *opp_table,
        }
 
        /* Scaling up? Scale voltage before frequency */
-       if (freq > old_freq) {
+       if (freq >= old_freq) {
                ret = _set_opp_voltage(dev, reg, new_supply);
                if (ret)
                        goto restore_voltage;
index 76243ca..b5c880b 100644 (file)
@@ -333,7 +333,7 @@ static int owl_pin_config_set(struct pinctrl_dev *pctrldev,
        unsigned long flags;
        unsigned int param;
        u32 reg, bit, width, arg;
-       int ret, i;
+       int ret = 0, i;
 
        info = &pctrl->soc->padinfo[pin];
 
index b601039..c4aa411 100644 (file)
@@ -101,10 +101,11 @@ struct pinctrl_dev *of_pinctrl_get(struct device_node *np)
 }
 
 static int dt_to_map_one_config(struct pinctrl *p,
-                               struct pinctrl_dev *pctldev,
+                               struct pinctrl_dev *hog_pctldev,
                                const char *statename,
                                struct device_node *np_config)
 {
+       struct pinctrl_dev *pctldev = NULL;
        struct device_node *np_pctldev;
        const struct pinctrl_ops *ops;
        int ret;
@@ -123,8 +124,10 @@ static int dt_to_map_one_config(struct pinctrl *p,
                        return -EPROBE_DEFER;
                }
                /* If we're creating a hog we can use the passed pctldev */
-               if (pctldev && (np_pctldev == p->dev->of_node))
+               if (hog_pctldev && (np_pctldev == p->dev->of_node)) {
+                       pctldev = hog_pctldev;
                        break;
+               }
                pctldev = get_pinctrl_dev_from_of_node(np_pctldev);
                if (pctldev)
                        break;
index ad6da11..e3f1ab2 100644 (file)
@@ -1459,6 +1459,9 @@ static int mtk_gpio_to_irq(struct gpio_chip *chip, unsigned int offset)
        struct mtk_pinctrl *hw = gpiochip_get_data(chip);
        unsigned long eint_n;
 
+       if (!hw->eint)
+               return -ENOTSUPP;
+
        eint_n = offset;
 
        return mtk_eint_find_irq(hw->eint, eint_n);
@@ -1471,7 +1474,8 @@ static int mtk_gpio_set_config(struct gpio_chip *chip, unsigned int offset,
        unsigned long eint_n;
        u32 debounce;
 
-       if (pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
+       if (!hw->eint ||
+           pinconf_to_config_param(config) != PIN_CONFIG_INPUT_DEBOUNCE)
                return -ENOTSUPP;
 
        debounce = pinconf_to_config_argument(config);
index b379969..16ff56f 100644 (file)
@@ -1000,11 +1000,6 @@ static int mtk_eint_init(struct mtk_pinctrl *pctl, struct platform_device *pdev)
                return -ENOMEM;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "Unable to get eint resource\n");
-               return -ENODEV;
-       }
-
        pctl->eint->base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(pctl->eint->base))
                return PTR_ERR(pctl->eint->base);
index b3153c0..e5647da 100644 (file)
@@ -1590,8 +1590,11 @@ static int pcs_save_context(struct pcs_device *pcs)
 
        mux_bytes = pcs->width / BITS_PER_BYTE;
 
-       if (!pcs->saved_vals)
+       if (!pcs->saved_vals) {
                pcs->saved_vals = devm_kzalloc(pcs->dev, pcs->size, GFP_ATOMIC);
+               if (!pcs->saved_vals)
+                       return -ENOMEM;
+       }
 
        switch (pcs->width) {
        case 64:
@@ -1651,8 +1654,13 @@ static int pinctrl_single_suspend(struct platform_device *pdev,
        if (!pcs)
                return -EINVAL;
 
-       if (pcs->flags & PCS_CONTEXT_LOSS_OFF)
-               pcs_save_context(pcs);
+       if (pcs->flags & PCS_CONTEXT_LOSS_OFF) {
+               int ret;
+
+               ret = pcs_save_context(pcs);
+               if (ret < 0)
+                       return ret;
+       }
 
        return pinctrl_force_sleep(pcs->pctl);
 }
index 767c485..547dbda 100644 (file)
@@ -221,7 +221,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
                }
                pct = &sysoff->ts[0];
                for (i = 0; i < sysoff->n_samples; i++) {
-                       getnstimeofday64(&ts);
+                       ktime_get_real_ts64(&ts);
                        pct->sec = ts.tv_sec;
                        pct->nsec = ts.tv_nsec;
                        pct++;
@@ -230,7 +230,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
                        pct->nsec = ts.tv_nsec;
                        pct++;
                }
-               getnstimeofday64(&ts);
+               ktime_get_real_ts64(&ts);
                pct->sec = ts.tv_sec;
                pct->nsec = ts.tv_nsec;
                if (copy_to_user((void __user *)arg, sysoff, sizeof(*sysoff)))
index 1468a16..e8652c1 100644 (file)
@@ -374,7 +374,7 @@ static int qoriq_ptp_probe(struct platform_device *dev)
                pr_err("ioremap ptp registers failed\n");
                goto no_ioremap;
        }
-       getnstimeofday64(&now);
+       ktime_get_real_ts64(&now);
        ptp_qoriq_settime(&qoriq_ptp->caps, &now);
 
        tmr_ctrl =
index 73cce3e..d3a38c4 100644 (file)
@@ -1222,80 +1222,37 @@ static void dasd_hosts_init(struct dentry *base_dentry,
                device->hosts_dentry = pde;
 }
 
-/*
- * Allocate memory for a channel program with 'cplength' channel
- * command words and 'datasize' additional space. There are two
- * variantes: 1) dasd_kmalloc_request uses kmalloc to get the needed
- * memory and 2) dasd_smalloc_request uses the static ccw memory
- * that gets allocated for each device.
- */
-struct dasd_ccw_req *dasd_kmalloc_request(int magic, int cplength,
-                                         int datasize,
-                                         struct dasd_device *device)
-{
-       struct dasd_ccw_req *cqr;
-
-       /* Sanity checks */
-       BUG_ON(datasize > PAGE_SIZE ||
-            (cplength*sizeof(struct ccw1)) > PAGE_SIZE);
-
-       cqr = kzalloc(sizeof(struct dasd_ccw_req), GFP_ATOMIC);
-       if (cqr == NULL)
-               return ERR_PTR(-ENOMEM);
-       cqr->cpaddr = NULL;
-       if (cplength > 0) {
-               cqr->cpaddr = kcalloc(cplength, sizeof(struct ccw1),
-                                     GFP_ATOMIC | GFP_DMA);
-               if (cqr->cpaddr == NULL) {
-                       kfree(cqr);
-                       return ERR_PTR(-ENOMEM);
-               }
-       }
-       cqr->data = NULL;
-       if (datasize > 0) {
-               cqr->data = kzalloc(datasize, GFP_ATOMIC | GFP_DMA);
-               if (cqr->data == NULL) {
-                       kfree(cqr->cpaddr);
-                       kfree(cqr);
-                       return ERR_PTR(-ENOMEM);
-               }
-       }
-       cqr->magic =  magic;
-       set_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
-       dasd_get_device(device);
-       return cqr;
-}
-EXPORT_SYMBOL(dasd_kmalloc_request);
-
-struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength,
-                                         int datasize,
-                                         struct dasd_device *device)
+struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength, int datasize,
+                                         struct dasd_device *device,
+                                         struct dasd_ccw_req *cqr)
 {
        unsigned long flags;
-       struct dasd_ccw_req *cqr;
-       char *data;
-       int size;
+       char *data, *chunk;
+       int size = 0;
 
-       size = (sizeof(struct dasd_ccw_req) + 7L) & -8L;
        if (cplength > 0)
                size += cplength * sizeof(struct ccw1);
        if (datasize > 0)
                size += datasize;
+       if (!cqr)
+               size += (sizeof(*cqr) + 7L) & -8L;
+
        spin_lock_irqsave(&device->mem_lock, flags);
-       cqr = (struct dasd_ccw_req *)
-               dasd_alloc_chunk(&device->ccw_chunks, size);
+       data = chunk = dasd_alloc_chunk(&device->ccw_chunks, size);
        spin_unlock_irqrestore(&device->mem_lock, flags);
-       if (cqr == NULL)
+       if (!chunk)
                return ERR_PTR(-ENOMEM);
-       memset(cqr, 0, sizeof(struct dasd_ccw_req));
-       data = (char *) cqr + ((sizeof(struct dasd_ccw_req) + 7L) & -8L);
-       cqr->cpaddr = NULL;
+       if (!cqr) {
+               cqr = (void *) data;
+               data += (sizeof(*cqr) + 7L) & -8L;
+       }
+       memset(cqr, 0, sizeof(*cqr));
+       cqr->mem_chunk = chunk;
        if (cplength > 0) {
-               cqr->cpaddr = (struct ccw1 *) data;
-               data += cplength*sizeof(struct ccw1);
-               memset(cqr->cpaddr, 0, cplength*sizeof(struct ccw1));
+               cqr->cpaddr = data;
+               data += cplength * sizeof(struct ccw1);
+               memset(cqr->cpaddr, 0, cplength * sizeof(struct ccw1));
        }
-       cqr->data = NULL;
        if (datasize > 0) {
                cqr->data = data;
                memset(cqr->data, 0, datasize);
@@ -1307,33 +1264,12 @@ struct dasd_ccw_req *dasd_smalloc_request(int magic, int cplength,
 }
 EXPORT_SYMBOL(dasd_smalloc_request);
 
-/*
- * Free memory of a channel program. This function needs to free all the
- * idal lists that might have been created by dasd_set_cda and the
- * struct dasd_ccw_req itself.
- */
-void dasd_kfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device)
-{
-       struct ccw1 *ccw;
-
-       /* Clear any idals used for the request. */
-       ccw = cqr->cpaddr;
-       do {
-               clear_normalized_cda(ccw);
-       } while (ccw++->flags & (CCW_FLAG_CC | CCW_FLAG_DC));
-       kfree(cqr->cpaddr);
-       kfree(cqr->data);
-       kfree(cqr);
-       dasd_put_device(device);
-}
-EXPORT_SYMBOL(dasd_kfree_request);
-
 void dasd_sfree_request(struct dasd_ccw_req *cqr, struct dasd_device *device)
 {
        unsigned long flags;
 
        spin_lock_irqsave(&device->mem_lock, flags);
-       dasd_free_chunk(&device->ccw_chunks, cqr);
+       dasd_free_chunk(&device->ccw_chunks, cqr->mem_chunk);
        spin_unlock_irqrestore(&device->mem_lock, flags);
        dasd_put_device(device);
 }
@@ -1885,6 +1821,33 @@ static void __dasd_device_process_ccw_queue(struct dasd_device *device,
        }
 }
 
+static void __dasd_process_cqr(struct dasd_device *device,
+                              struct dasd_ccw_req *cqr)
+{
+       char errorstring[ERRORLENGTH];
+
+       switch (cqr->status) {
+       case DASD_CQR_SUCCESS:
+               cqr->status = DASD_CQR_DONE;
+               break;
+       case DASD_CQR_ERROR:
+               cqr->status = DASD_CQR_NEED_ERP;
+               break;
+       case DASD_CQR_CLEARED:
+               cqr->status = DASD_CQR_TERMINATED;
+               break;
+       default:
+               /* internal error 12 - wrong cqr status*/
+               snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status);
+               dev_err(&device->cdev->dev,
+                       "An error occurred in the DASD device driver, "
+                       "reason=%s\n", errorstring);
+               BUG();
+       }
+       if (cqr->callback)
+               cqr->callback(cqr, cqr->callback_data);
+}
+
 /*
  * the cqrs from the final queue are returned to the upper layer
  * by setting a dasd_block state and calling the callback function
@@ -1895,40 +1858,18 @@ static void __dasd_device_process_final_queue(struct dasd_device *device,
        struct list_head *l, *n;
        struct dasd_ccw_req *cqr;
        struct dasd_block *block;
-       void (*callback)(struct dasd_ccw_req *, void *data);
-       void *callback_data;
-       char errorstring[ERRORLENGTH];
 
        list_for_each_safe(l, n, final_queue) {
                cqr = list_entry(l, struct dasd_ccw_req, devlist);
                list_del_init(&cqr->devlist);
                block = cqr->block;
-               callback = cqr->callback;
-               callback_data = cqr->callback_data;
-               if (block)
+               if (!block) {
+                       __dasd_process_cqr(device, cqr);
+               } else {
                        spin_lock_bh(&block->queue_lock);
-               switch (cqr->status) {
-               case DASD_CQR_SUCCESS:
-                       cqr->status = DASD_CQR_DONE;
-                       break;
-               case DASD_CQR_ERROR:
-                       cqr->status = DASD_CQR_NEED_ERP;
-                       break;
-               case DASD_CQR_CLEARED:
-                       cqr->status = DASD_CQR_TERMINATED;
-                       break;
-               default:
-                       /* internal error 12 - wrong cqr status*/
-                       snprintf(errorstring, ERRORLENGTH, "12 %p %x02", cqr, cqr->status);
-                       dev_err(&device->cdev->dev,
-                               "An error occurred in the DASD device driver, "
-                               "reason=%s\n", errorstring);
-                       BUG();
-               }
-               if (cqr->callback != NULL)
-                       (callback)(cqr, callback_data);
-               if (block)
+                       __dasd_process_cqr(device, cqr);
                        spin_unlock_bh(&block->queue_lock);
+               }
        }
 }
 
@@ -3041,7 +2982,6 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx,
        cqr->callback_data = req;
        cqr->status = DASD_CQR_FILLED;
        cqr->dq = dq;
-       *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req)) = cqr;
 
        blk_mq_start_request(req);
        spin_lock(&block->queue_lock);
@@ -3072,7 +3012,7 @@ enum blk_eh_timer_return dasd_times_out(struct request *req, bool reserved)
        unsigned long flags;
        int rc = 0;
 
-       cqr = *((struct dasd_ccw_req **) blk_mq_rq_to_pdu(req));
+       cqr = blk_mq_rq_to_pdu(req);
        if (!cqr)
                return BLK_EH_DONE;
 
@@ -3174,7 +3114,7 @@ static int dasd_alloc_queue(struct dasd_block *block)
        int rc;
 
        block->tag_set.ops = &dasd_mq_ops;
-       block->tag_set.cmd_size = sizeof(struct dasd_ccw_req *);
+       block->tag_set.cmd_size = sizeof(struct dasd_ccw_req);
        block->tag_set.nr_hw_queues = DASD_NR_HW_QUEUES;
        block->tag_set.queue_depth = DASD_MAX_LCU_DEV * DASD_REQ_PER_DEV;
        block->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
@@ -4038,7 +3978,8 @@ static struct dasd_ccw_req *dasd_generic_build_rdc(struct dasd_device *device,
        struct ccw1 *ccw;
        unsigned long *idaw;
 
-       cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device);
+       cqr = dasd_smalloc_request(magic, 1 /* RDC */, rdc_buffer_size, device,
+                                  NULL);
 
        if (IS_ERR(cqr)) {
                /* internal error 13 - Allocating the RDC request failed*/
index 5e963fe..e36a114 100644 (file)
@@ -407,9 +407,9 @@ static int read_unit_address_configuration(struct dasd_device *device,
        int rc;
        unsigned long flags;
 
-       cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
                                   (sizeof(struct dasd_psf_prssd_data)),
-                                  device);
+                                  device, NULL);
        if (IS_ERR(cqr))
                return PTR_ERR(cqr);
        cqr->startdev = device;
@@ -457,7 +457,7 @@ static int read_unit_address_configuration(struct dasd_device *device,
                lcu->flags |= NEED_UAC_UPDATE;
                spin_unlock_irqrestore(&lcu->lock, flags);
        }
-       dasd_kfree_request(cqr, cqr->memdev);
+       dasd_sfree_request(cqr, cqr->memdev);
        return rc;
 }
 
index 131f198..e1fe024 100644 (file)
@@ -536,7 +536,8 @@ static struct dasd_ccw_req *dasd_diag_build_cp(struct dasd_device *memdev,
        /* Build the request */
        datasize = sizeof(struct dasd_diag_req) +
                count*sizeof(struct dasd_diag_bio);
-       cqr = dasd_smalloc_request(DASD_DIAG_MAGIC, 0, datasize, memdev);
+       cqr = dasd_smalloc_request(DASD_DIAG_MAGIC, 0, datasize, memdev,
+                                  blk_mq_rq_to_pdu(req));
        if (IS_ERR(cqr))
                return cqr;
 
index be208e7..bbf95b7 100644 (file)
@@ -886,7 +886,7 @@ static int dasd_eckd_read_conf_lpm(struct dasd_device *device,
        }
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* RCD */,
                                   0, /* use rcd_buf as data ara */
-                                  device);
+                                  device, NULL);
        if (IS_ERR(cqr)) {
                DBF_DEV_EVENT(DBF_WARNING, device, "%s",
                              "Could not allocate RCD request");
@@ -1442,7 +1442,7 @@ static int dasd_eckd_read_features(struct dasd_device *device)
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
                                   (sizeof(struct dasd_psf_prssd_data) +
                                    sizeof(struct dasd_rssd_features)),
-                                  device);
+                                  device, NULL);
        if (IS_ERR(cqr)) {
                DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s", "Could not "
                                "allocate initialization request");
@@ -1504,7 +1504,7 @@ static struct dasd_ccw_req *dasd_eckd_build_psf_ssc(struct dasd_device *device,
 
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ ,
                                  sizeof(struct dasd_psf_ssc_data),
-                                 device);
+                                  device, NULL);
 
        if (IS_ERR(cqr)) {
                DBF_DEV_EVENT(DBF_WARNING, device, "%s",
@@ -1815,7 +1815,8 @@ dasd_eckd_analysis_ccw(struct dasd_device *device)
 
        cplength = 8;
        datasize = sizeof(struct DE_eckd_data) + 2*sizeof(struct LO_eckd_data);
-       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device);
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device,
+                                  NULL);
        if (IS_ERR(cqr))
                return cqr;
        ccw = cqr->cpaddr;
@@ -2092,7 +2093,8 @@ dasd_eckd_build_check_tcw(struct dasd_device *base, struct format_data_t *fdata,
         */
        itcw_size = itcw_calc_size(0, count, 0);
 
-       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev);
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev,
+                                  NULL);
        if (IS_ERR(cqr))
                return cqr;
 
@@ -2186,7 +2188,7 @@ dasd_eckd_build_check(struct dasd_device *base, struct format_data_t *fdata,
        cplength += count;
 
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize,
-                                 startdev);
+                                  startdev, NULL);
        if (IS_ERR(cqr))
                return cqr;
 
@@ -2332,7 +2334,7 @@ dasd_eckd_build_format(struct dasd_device *base,
        }
        /* Allocate the format ccw request. */
        fcp = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength,
-                                  datasize, startdev);
+                                  datasize, startdev, NULL);
        if (IS_ERR(fcp))
                return fcp;
 
@@ -3103,7 +3105,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single(
        }
        /* Allocate the ccw request. */
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize,
-                                  startdev);
+                                  startdev, blk_mq_rq_to_pdu(req));
        if (IS_ERR(cqr))
                return cqr;
        ccw = cqr->cpaddr;
@@ -3262,7 +3264,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_track(
 
        /* Allocate the ccw request. */
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize,
-                                  startdev);
+                                  startdev, blk_mq_rq_to_pdu(req));
        if (IS_ERR(cqr))
                return cqr;
        ccw = cqr->cpaddr;
@@ -3595,7 +3597,8 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_tpm_track(
 
        /* Allocate the ccw request. */
        itcw_size = itcw_calc_size(0, ctidaw, 0);
-       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev);
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 0, itcw_size, startdev,
+                                  blk_mq_rq_to_pdu(req));
        if (IS_ERR(cqr))
                return cqr;
 
@@ -3862,7 +3865,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_raw(struct dasd_device *startdev,
 
        /* Allocate the ccw request. */
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength,
-                                  datasize, startdev);
+                                  datasize, startdev, blk_mq_rq_to_pdu(req));
        if (IS_ERR(cqr))
                return cqr;
 
@@ -4102,7 +4105,7 @@ dasd_eckd_release(struct dasd_device *device)
                return -EACCES;
 
        useglobal = 0;
-       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device, NULL);
        if (IS_ERR(cqr)) {
                mutex_lock(&dasd_reserve_mutex);
                useglobal = 1;
@@ -4157,7 +4160,7 @@ dasd_eckd_reserve(struct dasd_device *device)
                return -EACCES;
 
        useglobal = 0;
-       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device, NULL);
        if (IS_ERR(cqr)) {
                mutex_lock(&dasd_reserve_mutex);
                useglobal = 1;
@@ -4211,7 +4214,7 @@ dasd_eckd_steal_lock(struct dasd_device *device)
                return -EACCES;
 
        useglobal = 0;
-       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device);
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1, 32, device, NULL);
        if (IS_ERR(cqr)) {
                mutex_lock(&dasd_reserve_mutex);
                useglobal = 1;
@@ -4271,7 +4274,8 @@ static int dasd_eckd_snid(struct dasd_device *device,
 
        useglobal = 0;
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1,
-                                  sizeof(struct dasd_snid_data), device);
+                                  sizeof(struct dasd_snid_data), device,
+                                  NULL);
        if (IS_ERR(cqr)) {
                mutex_lock(&dasd_reserve_mutex);
                useglobal = 1;
@@ -4331,7 +4335,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp)
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */  + 1 /* RSSD */,
                                   (sizeof(struct dasd_psf_prssd_data) +
                                    sizeof(struct dasd_rssd_perf_stats_t)),
-                                  device);
+                                  device, NULL);
        if (IS_ERR(cqr)) {
                DBF_DEV_EVENT(DBF_WARNING, device, "%s",
                            "Could not allocate initialization request");
@@ -4477,7 +4481,7 @@ static int dasd_symm_io(struct dasd_device *device, void __user *argp)
        psf1 = psf_data[1];
 
        /* setup CCWs for PSF + RSSD */
-       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 2 , 0, device);
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 2, 0, device, NULL);
        if (IS_ERR(cqr)) {
                DBF_DEV_EVENT(DBF_WARNING, device, "%s",
                        "Could not allocate initialization request");
@@ -5037,7 +5041,7 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device,
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
                                   (sizeof(struct dasd_psf_prssd_data) +
                                    sizeof(struct dasd_rssd_messages)),
-                                  device);
+                                  device, NULL);
        if (IS_ERR(cqr)) {
                DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s",
                                "Could not allocate read message buffer request");
@@ -5126,7 +5130,7 @@ static int dasd_eckd_query_host_access(struct dasd_device *device,
 
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */,
                                   sizeof(struct dasd_psf_prssd_data) + 1,
-                                  device);
+                                  device, NULL);
        if (IS_ERR(cqr)) {
                DBF_EVENT_DEVID(DBF_WARNING, device->cdev, "%s",
                                "Could not allocate read message buffer request");
@@ -5284,8 +5288,8 @@ dasd_eckd_psf_cuir_response(struct dasd_device *device, int response,
        int rc;
 
        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ ,
-                                 sizeof(struct dasd_psf_cuir_response),
-                                 device);
+                                  sizeof(struct dasd_psf_cuir_response),
+                                  device, NULL);
 
        if (IS_ERR(cqr)) {
                DBF_DEV_EVENT(DBF_WARNING, device, "%s",
index 0af8c52..6ef8714 100644 (file)
@@ -447,7 +447,7 @@ static void dasd_eer_snss_cb(struct dasd_ccw_req *cqr, void *data)
                 * is a new ccw in device->eer_cqr. Free the "old"
                 * snss request now.
                 */
-               dasd_kfree_request(cqr, device);
+               dasd_sfree_request(cqr, device);
 }
 
 /*
@@ -472,8 +472,8 @@ int dasd_eer_enable(struct dasd_device *device)
        if (rc)
                goto out;
 
-       cqr = dasd_kmalloc_request(DASD_ECKD_MAGIC, 1 /* SNSS */,
-                                  SNSS_DATA_SIZE, device);
+       cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* SNSS */,
+                                  SNSS_DATA_SIZE, device, NULL);
        if (IS_ERR(cqr)) {
                rc = -ENOMEM;
                cqr = NULL;
@@ -505,7 +505,7 @@ out:
        spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
 
        if (cqr)
-               dasd_kfree_request(cqr, device);
+               dasd_sfree_request(cqr, device);
 
        return rc;
 }
@@ -528,7 +528,7 @@ void dasd_eer_disable(struct dasd_device *device)
        in_use = test_and_clear_bit(DASD_FLAG_EER_IN_USE, &device->flags);
        spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
        if (cqr && !in_use)
-               dasd_kfree_request(cqr, device);
+               dasd_sfree_request(cqr, device);
 }
 
 /*
index a6b132f..56007a3 100644 (file)
@@ -356,7 +356,8 @@ static struct dasd_ccw_req *dasd_fba_build_cp_discard(
        datasize = sizeof(struct DE_fba_data) +
                nr_ccws * (sizeof(struct LO_fba_data) + sizeof(struct ccw1));
 
-       cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev);
+       cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev,
+                                  blk_mq_rq_to_pdu(req));
        if (IS_ERR(cqr))
                return cqr;
 
@@ -490,7 +491,8 @@ static struct dasd_ccw_req *dasd_fba_build_cp_regular(
                datasize += (count - 1)*sizeof(struct LO_fba_data);
        }
        /* Allocate the ccw request. */
-       cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev);
+       cqr = dasd_smalloc_request(DASD_FBA_MAGIC, cplength, datasize, memdev,
+                                  blk_mq_rq_to_pdu(req));
        if (IS_ERR(cqr))
                return cqr;
        ccw = cqr->cpaddr;
index 96709b1..976b6bd 100644 (file)
@@ -158,40 +158,33 @@ do { \
 
 struct dasd_ccw_req {
        unsigned int magic;             /* Eye catcher */
+       int intrc;                      /* internal error, e.g. from start_IO */
        struct list_head devlist;       /* for dasd_device request queue */
        struct list_head blocklist;     /* for dasd_block request queue */
-
-       /* Where to execute what... */
        struct dasd_block *block;       /* the originating block device */
        struct dasd_device *memdev;     /* the device used to allocate this */
        struct dasd_device *startdev;   /* device the request is started on */
        struct dasd_device *basedev;    /* base device if no block->base */
        void *cpaddr;                   /* address of ccw or tcw */
+       short retries;                  /* A retry counter */
        unsigned char cpmode;           /* 0 = cmd mode, 1 = itcw */
        char status;                    /* status of this request */
-       short retries;                  /* A retry counter */
+       char lpm;                       /* logical path mask */
        unsigned long flags;            /* flags of this request */
        struct dasd_queue *dq;
-
-       /* ... and how */
        unsigned long starttime;        /* jiffies time of request start */
        unsigned long expires;          /* expiration period in jiffies */
-       char lpm;                       /* logical path mask */
        void *data;                     /* pointer to data area */
-
-       /* these are important for recovering erroneous requests          */
-       int intrc;                      /* internal error, e.g. from start_IO */
        struct irb irb;                 /* device status in case of an error */
        struct dasd_ccw_req *refers;    /* ERP-chain queueing. */
        void *function;                 /* originating ERP action */
+       void *mem_chunk;
 
-       /* these are for statistics only */
        unsigned long buildclk;         /* TOD-clock of request generation */
        unsigned long startclk;         /* TOD-clock of request start */
        unsigned long stopclk;          /* TOD-clock of request interrupt */
        unsigned long endclk;           /* TOD-clock of request termination */
 
-        /* Callback that is called after reaching final status. */
        void (*callback)(struct dasd_ccw_req *, void *data);
        void *callback_data;
 };
@@ -714,19 +707,10 @@ extern const struct block_device_operations dasd_device_operations;
 extern struct kmem_cache *dasd_page_cache;
 
 struct dasd_ccw_req *
-dasd_kmalloc_request(int , int, int, struct dasd_device *);
-struct dasd_ccw_req *
-dasd_smalloc_request(int , int, int, struct dasd_device *);
-void dasd_kfree_request(struct dasd_ccw_req *, struct dasd_device *);
+dasd_smalloc_request(int, int, int, struct dasd_device *, struct dasd_ccw_req *);
 void dasd_sfree_request(struct dasd_ccw_req *, struct dasd_device *);
 void dasd_wakeup_cb(struct dasd_ccw_req *, void *);
 
-static inline int
-dasd_kmalloc_set_cda(struct ccw1 *ccw, void *cda, struct dasd_device *device)
-{
-       return set_normalized_cda(ccw, cda);
-}
-
 struct dasd_device *dasd_alloc_device(void);
 void dasd_free_device(struct dasd_device *);
 
index a070ef0..f230516 100644 (file)
@@ -5,6 +5,7 @@
 
 # The following is required for define_trace.h to find ./trace.h
 CFLAGS_trace.o := -I$(src)
+CFLAGS_vfio_ccw_fsm.o := -I$(src)
 
 obj-y += airq.o blacklist.o chsc.o cio.o css.o chp.o idset.o isc.o \
        fcx.o itcw.o crw.o ccwreq.o trace.o ioasm.o
index dce92b2..dbe7c7a 100644 (file)
 #define CCWCHAIN_LEN_MAX       256
 
 struct pfn_array {
+       /* Starting guest physical I/O address. */
        unsigned long           pa_iova;
+       /* Array that stores PFNs of the pages need to pin. */
        unsigned long           *pa_iova_pfn;
+       /* Array that receives PFNs of the pages pinned. */
        unsigned long           *pa_pfn;
+       /* Number of pages pinned from @pa_iova. */
        int                     pa_nr;
 };
 
@@ -46,70 +50,33 @@ struct ccwchain {
 };
 
 /*
- * pfn_array_pin() - pin user pages in memory
+ * pfn_array_alloc_pin() - alloc memory for PFNs, then pin user pages in memory
  * @pa: pfn_array on which to perform the operation
  * @mdev: the mediated device to perform pin/unpin operations
+ * @iova: target guest physical address
+ * @len: number of bytes that should be pinned from @iova
  *
- * Attempt to pin user pages in memory.
+ * Attempt to allocate memory for PFNs, and pin user pages in memory.
  *
  * Usage of pfn_array:
- * @pa->pa_iova     starting guest physical I/O address. Assigned by caller.
- * @pa->pa_iova_pfn array that stores PFNs of the pages need to pin. Allocated
- *                  by caller.
- * @pa->pa_pfn      array that receives PFNs of the pages pinned. Allocated by
- *                  caller.
- * @pa->pa_nr       number of pages from @pa->pa_iova to pin. Assigned by
- *                  caller.
- *                  number of pages pinned. Assigned by callee.
+ * We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in
+ * this structure will be filled in by this function.
  *
  * Returns:
  *   Number of pages pinned on success.
- *   If @pa->pa_nr is 0 or negative, returns 0.
+ *   If @pa->pa_nr is not 0, or @pa->pa_iova_pfn is not NULL initially,
+ *   returns -EINVAL.
  *   If no pages were pinned, returns -errno.
  */
-static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
-{
-       int i, ret;
-
-       if (pa->pa_nr <= 0) {
-               pa->pa_nr = 0;
-               return 0;
-       }
-
-       pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
-       for (i = 1; i < pa->pa_nr; i++)
-               pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
-
-       ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
-                            IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
-
-       if (ret > 0 && ret != pa->pa_nr) {
-               vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
-               pa->pa_nr = 0;
-               return 0;
-       }
-
-       return ret;
-}
-
-/* Unpin the pages before releasing the memory. */
-static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
-{
-       vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
-       pa->pa_nr = 0;
-       kfree(pa->pa_iova_pfn);
-}
-
-/* Alloc memory for PFNs, then pin pages with them. */
 static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
                               u64 iova, unsigned int len)
 {
-       int ret = 0;
+       int i, ret = 0;
 
        if (!len)
                return 0;
 
-       if (pa->pa_nr)
+       if (pa->pa_nr || pa->pa_iova_pfn)
                return -EINVAL;
 
        pa->pa_iova = iova;
@@ -126,18 +93,39 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
                return -ENOMEM;
        pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
 
-       ret = pfn_array_pin(pa, mdev);
+       pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
+       for (i = 1; i < pa->pa_nr; i++)
+               pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
 
-       if (ret > 0)
-               return ret;
-       else if (!ret)
+       ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
+                            IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
+
+       if (ret < 0) {
+               goto err_out;
+       } else if (ret > 0 && ret != pa->pa_nr) {
+               vfio_unpin_pages(mdev, pa->pa_iova_pfn, ret);
                ret = -EINVAL;
+               goto err_out;
+       }
 
+       return ret;
+
+err_out:
+       pa->pa_nr = 0;
        kfree(pa->pa_iova_pfn);
+       pa->pa_iova_pfn = NULL;
 
        return ret;
 }
 
+/* Unpin the pages before releasing the memory. */
+static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
+{
+       vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
+       pa->pa_nr = 0;
+       kfree(pa->pa_iova_pfn);
+}
+
 static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
 {
        pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
@@ -365,6 +353,9 @@ static void cp_unpin_free(struct channel_program *cp)
  * This is the chain length not considering any TICs.
  * You need to do a new round for each TIC target.
  *
+ * The program is also validated for absence of not yet supported
+ * indirect data addressing scenarios.
+ *
  * Returns: the length of the ccw chain or -errno.
  */
 static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
@@ -391,6 +382,14 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
        do {
                cnt++;
 
+               /*
+                * As we don't want to fail direct addressing even if the
+                * orb specified one of the unsupported formats, we defer
+                * checking for IDAWs in unsupported formats to here.
+                */
+               if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
+                       return -EOPNOTSUPP;
+
                if ((!ccw_is_chain(ccw)) && (!ccw_is_tic(ccw)))
                        break;
 
@@ -503,7 +502,7 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
        struct ccw1 *ccw;
        struct pfn_array_table *pat;
        unsigned long *idaws;
-       int idaw_nr;
+       int ret;
 
        ccw = chain->ch_ccw + idx;
 
@@ -523,18 +522,19 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
         * needed when translating a direct ccw to a idal ccw.
         */
        pat = chain->ch_pat + idx;
-       if (pfn_array_table_init(pat, 1))
-               return -ENOMEM;
-       idaw_nr = pfn_array_alloc_pin(pat->pat_pa, cp->mdev,
-                                     ccw->cda, ccw->count);
-       if (idaw_nr < 0)
-               return idaw_nr;
+       ret = pfn_array_table_init(pat, 1);
+       if (ret)
+               goto out_init;
+
+       ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
+       if (ret < 0)
+               goto out_init;
 
        /* Translate this direct ccw to a idal ccw. */
-       idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
+       idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
        if (!idaws) {
-               pfn_array_table_unpin_free(pat, cp->mdev);
-               return -ENOMEM;
+               ret = -ENOMEM;
+               goto out_unpin;
        }
        ccw->cda = (__u32) virt_to_phys(idaws);
        ccw->flags |= CCW_FLAG_IDA;
@@ -542,6 +542,12 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
        pfn_array_table_idal_create_words(pat, idaws);
 
        return 0;
+
+out_unpin:
+       pfn_array_table_unpin_free(pat, cp->mdev);
+out_init:
+       ccw->cda = 0;
+       return ret;
 }
 
 static int ccwchain_fetch_idal(struct ccwchain *chain,
@@ -571,7 +577,7 @@ static int ccwchain_fetch_idal(struct ccwchain *chain,
        pat = chain->ch_pat + idx;
        ret = pfn_array_table_init(pat, idaw_nr);
        if (ret)
-               return ret;
+               goto out_init;
 
        /* Translate idal ccw to use new allocated idaws. */
        idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
@@ -603,6 +609,8 @@ out_free_idaws:
        kfree(idaws);
 out_unpin:
        pfn_array_table_unpin_free(pat, cp->mdev);
+out_init:
+       ccw->cda = 0;
        return ret;
 }
 
@@ -656,10 +664,8 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
        /*
         * XXX:
         * Only support prefetch enable mode now.
-        * Only support 64bit addressing idal.
-        * Only support 4k IDAW.
         */
-       if (!orb->cmd.pfch || !orb->cmd.c64 || orb->cmd.i2k)
+       if (!orb->cmd.pfch)
                return -EOPNOTSUPP;
 
        INIT_LIST_HEAD(&cp->ccwchain_list);
@@ -688,6 +694,10 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
        ret = ccwchain_loop_tic(chain, cp);
        if (ret)
                cp_unpin_free(cp);
+       /* It is safe to force: if not set but idals used
+        * ccwchain_calc_length returns an error.
+        */
+       cp->orb.cmd.c64 = 1;
 
        return ret;
 }
index ea6a2d0..770fa9c 100644 (file)
@@ -177,6 +177,7 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process)
 {
        struct vfio_ccw_private *private = dev_get_drvdata(&sch->dev);
        unsigned long flags;
+       int rc = -EAGAIN;
 
        spin_lock_irqsave(sch->lock, flags);
        if (!device_is_registered(&sch->dev))
@@ -187,6 +188,7 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process)
 
        if (cio_update_schib(sch)) {
                vfio_ccw_fsm_event(private, VFIO_CCW_EVENT_NOT_OPER);
+               rc = 0;
                goto out_unlock;
        }
 
@@ -195,11 +197,12 @@ static int vfio_ccw_sch_event(struct subchannel *sch, int process)
                private->state = private->mdev ? VFIO_CCW_STATE_IDLE :
                                 VFIO_CCW_STATE_STANDBY;
        }
+       rc = 0;
 
 out_unlock:
        spin_unlock_irqrestore(sch->lock, flags);
 
-       return 0;
+       return rc;
 }
 
 static struct css_device_id vfio_ccw_sch_ids[] = {
index 3c80064..797a827 100644 (file)
@@ -13,6 +13,9 @@
 #include "ioasm.h"
 #include "vfio_ccw_private.h"
 
+#define CREATE_TRACE_POINTS
+#include "vfio_ccw_trace.h"
+
 static int fsm_io_helper(struct vfio_ccw_private *private)
 {
        struct subchannel *sch;
@@ -110,6 +113,10 @@ static void fsm_disabled_irq(struct vfio_ccw_private *private,
         */
        cio_disable_subchannel(sch);
 }
+inline struct subchannel_id get_schid(struct vfio_ccw_private *p)
+{
+       return p->sch->schid;
+}
 
 /*
  * Deal with the ccw command request from the userspace.
@@ -121,6 +128,7 @@ static void fsm_io_request(struct vfio_ccw_private *private,
        union scsw *scsw = &private->scsw;
        struct ccw_io_region *io_region = &private->io_region;
        struct mdev_device *mdev = private->mdev;
+       char *errstr = "request";
 
        private->state = VFIO_CCW_STATE_BOXED;
 
@@ -132,15 +140,19 @@ static void fsm_io_request(struct vfio_ccw_private *private,
                /* Don't try to build a cp if transport mode is specified. */
                if (orb->tm.b) {
                        io_region->ret_code = -EOPNOTSUPP;
+                       errstr = "transport mode";
                        goto err_out;
                }
                io_region->ret_code = cp_init(&private->cp, mdev_dev(mdev),
                                              orb);
-               if (io_region->ret_code)
+               if (io_region->ret_code) {
+                       errstr = "cp init";
                        goto err_out;
+               }
 
                io_region->ret_code = cp_prefetch(&private->cp);
                if (io_region->ret_code) {
+                       errstr = "cp prefetch";
                        cp_free(&private->cp);
                        goto err_out;
                }
@@ -148,6 +160,7 @@ static void fsm_io_request(struct vfio_ccw_private *private,
                /* Start channel program and wait for I/O interrupt. */
                io_region->ret_code = fsm_io_helper(private);
                if (io_region->ret_code) {
+                       errstr = "cp fsm_io_helper";
                        cp_free(&private->cp);
                        goto err_out;
                }
@@ -164,6 +177,8 @@ static void fsm_io_request(struct vfio_ccw_private *private,
 
 err_out:
        private->state = VFIO_CCW_STATE_IDLE;
+       trace_vfio_ccw_io_fctl(scsw->cmd.fctl, get_schid(private),
+                              io_region->ret_code, errstr);
 }
 
 /*
diff --git a/drivers/s390/cio/vfio_ccw_trace.h b/drivers/s390/cio/vfio_ccw_trace.h
new file mode 100644 (file)
index 0000000..b1da53d
--- /dev/null
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Tracepoints for vfio_ccw driver
+ *
+ * Copyright IBM Corp. 2018
+ *
+ * Author(s): Dong Jia Shi <bjsdjshi@linux.vnet.ibm.com>
+ *            Halil Pasic <pasic@linux.vnet.ibm.com>
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vfio_ccw
+
+#if !defined(_VFIO_CCW_TRACE_) || defined(TRACE_HEADER_MULTI_READ)
+#define _VFIO_CCW_TRACE_
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(vfio_ccw_io_fctl,
+       TP_PROTO(int fctl, struct subchannel_id schid, int errno, char *errstr),
+       TP_ARGS(fctl, schid, errno, errstr),
+
+       TP_STRUCT__entry(
+               __field(int, fctl)
+               __field_struct(struct subchannel_id, schid)
+               __field(int, errno)
+               __field(char*, errstr)
+       ),
+
+       TP_fast_assign(
+               __entry->fctl = fctl;
+               __entry->schid = schid;
+               __entry->errno = errno;
+               __entry->errstr = errstr;
+       ),
+
+       TP_printk("schid=%x.%x.%04x fctl=%x errno=%d info=%s",
+                 __entry->schid.cssid,
+                 __entry->schid.ssid,
+                 __entry->schid.sch_no,
+                 __entry->fctl,
+                 __entry->errno,
+                 __entry->errstr)
+);
+
+#endif /* _VFIO_CCW_TRACE_ */
+
+/* This part must be outside protection */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE vfio_ccw_trace
+
+#include <trace/define_trace.h>
index 1da3d71..1394810 100644 (file)
@@ -3592,7 +3592,7 @@ fc_bsg_job_timeout(struct request *req)
 
        /* the blk_end_sync_io() doesn't check the error */
        if (inflight)
-               blk_mq_complete_request(req);
+               __blk_complete_request(req);
        return BLK_EH_DONE;
 }
 
index 36f59a1..61389bd 100644 (file)
@@ -654,10 +654,17 @@ static int scsifront_dev_reset_handler(struct scsi_cmnd *sc)
 static int scsifront_sdev_configure(struct scsi_device *sdev)
 {
        struct vscsifrnt_info *info = shost_priv(sdev->host);
+       int err;
 
-       if (info && current == info->curr)
-               xenbus_printf(XBT_NIL, info->dev->nodename,
+       if (info && current == info->curr) {
+               err = xenbus_printf(XBT_NIL, info->dev->nodename,
                              info->dev_state_path, "%d", XenbusStateConnected);
+               if (err) {
+                       xenbus_dev_error(info->dev, err,
+                               "%s: writing dev_state_path", __func__);
+                       return err;
+               }
+       }
 
        return 0;
 }
@@ -665,10 +672,15 @@ static int scsifront_sdev_configure(struct scsi_device *sdev)
 static void scsifront_sdev_destroy(struct scsi_device *sdev)
 {
        struct vscsifrnt_info *info = shost_priv(sdev->host);
+       int err;
 
-       if (info && current == info->curr)
-               xenbus_printf(XBT_NIL, info->dev->nodename,
+       if (info && current == info->curr) {
+               err = xenbus_printf(XBT_NIL, info->dev->nodename,
                              info->dev_state_path, "%d", XenbusStateClosed);
+               if (err)
+                       xenbus_dev_error(info->dev, err,
+                               "%s: writing dev_state_path", __func__);
+       }
 }
 
 static struct scsi_host_template scsifront_sht = {
@@ -1003,9 +1015,12 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
 
                        if (scsi_add_device(info->host, chn, tgt, lun)) {
                                dev_err(&dev->dev, "scsi_add_device\n");
-                               xenbus_printf(XBT_NIL, dev->nodename,
+                               err = xenbus_printf(XBT_NIL, dev->nodename,
                                              info->dev_state_path,
                                              "%d", XenbusStateClosed);
+                               if (err)
+                                       xenbus_dev_error(dev, err,
+                                               "%s: writing dev_state_path", __func__);
                        }
                        break;
                case VSCSIFRONT_OP_DEL_LUN:
@@ -1019,10 +1034,14 @@ static void scsifront_do_lun_hotplug(struct vscsifrnt_info *info, int op)
                        }
                        break;
                case VSCSIFRONT_OP_READD_LUN:
-                       if (device_state == XenbusStateConnected)
-                               xenbus_printf(XBT_NIL, dev->nodename,
+                       if (device_state == XenbusStateConnected) {
+                               err = xenbus_printf(XBT_NIL, dev->nodename,
                                              info->dev_state_path,
                                              "%d", XenbusStateConnected);
+                               if (err)
+                                       xenbus_dev_error(dev, err,
+                                               "%s: writing dev_state_path", __func__);
+                       }
                        break;
                default:
                        break;
index 451e833..48b1542 100644 (file)
@@ -41,4 +41,4 @@ obj-$(CONFIG_XEN_PVCALLS_FRONTEND)    += pvcalls-front.o
 xen-evtchn-y                           := evtchn.o
 xen-gntdev-y                           := gntdev.o
 xen-gntalloc-y                         := gntalloc.o
-xen-privcmd-y                          := privcmd.o
+xen-privcmd-y                          := privcmd.o privcmd-buf.o
index 762378f..08e4af0 100644 (file)
@@ -628,8 +628,6 @@ static void __unbind_from_irq(unsigned int irq)
                xen_irq_info_cleanup(info);
        }
 
-       BUG_ON(info_for_irq(irq)->type == IRQT_UNBOUND);
-
        xen_free_irq(irq);
 }
 
index 2473b0a..ba9f3ee 100644 (file)
@@ -799,7 +799,7 @@ int gnttab_alloc_pages(int nr_pages, struct page **pages)
 
        return 0;
 }
-EXPORT_SYMBOL(gnttab_alloc_pages);
+EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
 
 /**
  * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
@@ -820,7 +820,7 @@ void gnttab_free_pages(int nr_pages, struct page **pages)
        }
        free_xenballooned_pages(nr_pages, pages);
 }
-EXPORT_SYMBOL(gnttab_free_pages);
+EXPORT_SYMBOL_GPL(gnttab_free_pages);
 
 /* Handling of paged out grant targets (GNTST_eagain) */
 #define MAX_DELAY 256
index 8835065..c93d8ef 100644 (file)
@@ -289,8 +289,15 @@ static void sysrq_handler(struct xenbus_watch *watch, const char *path,
                return;
        }
 
-       if (sysrq_key != '\0')
-               xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+       if (sysrq_key != '\0') {
+               err = xenbus_printf(xbt, "control", "sysrq", "%c", '\0');
+               if (err) {
+                       pr_err("%s: Error %d writing sysrq in control/sysrq\n",
+                              __func__, err);
+                       xenbus_transaction_end(xbt, 1);
+                       return;
+               }
+       }
 
        err = xenbus_transaction_end(xbt, 0);
        if (err == -EAGAIN)
@@ -342,7 +349,12 @@ static int setup_shutdown_watcher(void)
                        continue;
                snprintf(node, FEATURE_PATH_SIZE, "feature-%s",
                         shutdown_handlers[idx].command);
-               xenbus_printf(XBT_NIL, "control", node, "%u", 1);
+               err = xenbus_printf(XBT_NIL, "control", node, "%u", 1);
+               if (err) {
+                       pr_err("%s: Error %d writing %s\n", __func__,
+                               err, node);
+                       return err;
+               }
        }
 
        return 0;
diff --git a/drivers/xen/privcmd-buf.c b/drivers/xen/privcmd-buf.c
new file mode 100644 (file)
index 0000000..df1ed37
--- /dev/null
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+
+/******************************************************************************
+ * privcmd-buf.c
+ *
+ * Mmap of hypercall buffers.
+ *
+ * Copyright (c) 2018 Juergen Gross
+ */
+
+#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "privcmd.h"
+
+MODULE_LICENSE("GPL");
+
+static unsigned int limit = 64;
+module_param(limit, uint, 0644);
+MODULE_PARM_DESC(limit, "Maximum number of pages that may be allocated by "
+                       "the privcmd-buf device per open file");
+
+struct privcmd_buf_private {
+       struct mutex lock;
+       struct list_head list;
+       unsigned int allocated;
+};
+
+struct privcmd_buf_vma_private {
+       struct privcmd_buf_private *file_priv;
+       struct list_head list;
+       unsigned int users;
+       unsigned int n_pages;
+       struct page *pages[];
+};
+
+static int privcmd_buf_open(struct inode *ino, struct file *file)
+{
+       struct privcmd_buf_private *file_priv;
+
+       file_priv = kzalloc(sizeof(*file_priv), GFP_KERNEL);
+       if (!file_priv)
+               return -ENOMEM;
+
+       mutex_init(&file_priv->lock);
+       INIT_LIST_HEAD(&file_priv->list);
+
+       file->private_data = file_priv;
+
+       return 0;
+}
+
+static void privcmd_buf_vmapriv_free(struct privcmd_buf_vma_private *vma_priv)
+{
+       unsigned int i;
+
+       vma_priv->file_priv->allocated -= vma_priv->n_pages;
+
+       list_del(&vma_priv->list);
+
+       for (i = 0; i < vma_priv->n_pages; i++)
+               if (vma_priv->pages[i])
+                       __free_page(vma_priv->pages[i]);
+
+       kfree(vma_priv);
+}
+
+static int privcmd_buf_release(struct inode *ino, struct file *file)
+{
+       struct privcmd_buf_private *file_priv = file->private_data;
+       struct privcmd_buf_vma_private *vma_priv;
+
+       mutex_lock(&file_priv->lock);
+
+       while (!list_empty(&file_priv->list)) {
+               vma_priv = list_first_entry(&file_priv->list,
+                                           struct privcmd_buf_vma_private,
+                                           list);
+               privcmd_buf_vmapriv_free(vma_priv);
+       }
+
+       mutex_unlock(&file_priv->lock);
+
+       kfree(file_priv);
+
+       return 0;
+}
+
+static void privcmd_buf_vma_open(struct vm_area_struct *vma)
+{
+       struct privcmd_buf_vma_private *vma_priv = vma->vm_private_data;
+
+       if (!vma_priv)
+               return;
+
+       mutex_lock(&vma_priv->file_priv->lock);
+       vma_priv->users++;
+       mutex_unlock(&vma_priv->file_priv->lock);
+}
+
+static void privcmd_buf_vma_close(struct vm_area_struct *vma)
+{
+       struct privcmd_buf_vma_private *vma_priv = vma->vm_private_data;
+       struct privcmd_buf_private *file_priv;
+
+       if (!vma_priv)
+               return;
+
+       file_priv = vma_priv->file_priv;
+
+       mutex_lock(&file_priv->lock);
+
+       vma_priv->users--;
+       if (!vma_priv->users)
+               privcmd_buf_vmapriv_free(vma_priv);
+
+       mutex_unlock(&file_priv->lock);
+}
+
+static vm_fault_t privcmd_buf_vma_fault(struct vm_fault *vmf)
+{
+       pr_debug("fault: vma=%p %lx-%lx, pgoff=%lx, uv=%p\n",
+                vmf->vma, vmf->vma->vm_start, vmf->vma->vm_end,
+                vmf->pgoff, (void *)vmf->address);
+
+       return VM_FAULT_SIGBUS;
+}
+
+static const struct vm_operations_struct privcmd_buf_vm_ops = {
+       .open = privcmd_buf_vma_open,
+       .close = privcmd_buf_vma_close,
+       .fault = privcmd_buf_vma_fault,
+};
+
+static int privcmd_buf_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct privcmd_buf_private *file_priv = file->private_data;
+       struct privcmd_buf_vma_private *vma_priv;
+       unsigned long count = vma_pages(vma);
+       unsigned int i;
+       int ret = 0;
+
+       if (!(vma->vm_flags & VM_SHARED) || count > limit ||
+           file_priv->allocated + count > limit)
+               return -EINVAL;
+
+       vma_priv = kzalloc(sizeof(*vma_priv) + count * sizeof(void *),
+                          GFP_KERNEL);
+       if (!vma_priv)
+               return -ENOMEM;
+
+       vma_priv->n_pages = count;
+       count = 0;
+       for (i = 0; i < vma_priv->n_pages; i++) {
+               vma_priv->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
+               if (!vma_priv->pages[i])
+                       break;
+               count++;
+       }
+
+       mutex_lock(&file_priv->lock);
+
+       file_priv->allocated += count;
+
+       vma_priv->file_priv = file_priv;
+       vma_priv->users = 1;
+
+       vma->vm_flags |= VM_IO | VM_DONTEXPAND;
+       vma->vm_ops = &privcmd_buf_vm_ops;
+       vma->vm_private_data = vma_priv;
+
+       list_add(&vma_priv->list, &file_priv->list);
+
+       if (vma_priv->n_pages != count)
+               ret = -ENOMEM;
+       else
+               for (i = 0; i < vma_priv->n_pages; i++) {
+                       ret = vm_insert_page(vma, vma->vm_start + i * PAGE_SIZE,
+                                            vma_priv->pages[i]);
+                       if (ret)
+                               break;
+               }
+
+       if (ret)
+               privcmd_buf_vmapriv_free(vma_priv);
+
+       mutex_unlock(&file_priv->lock);
+
+       return ret;
+}
+
+const struct file_operations xen_privcmdbuf_fops = {
+       .owner = THIS_MODULE,
+       .open = privcmd_buf_open,
+       .release = privcmd_buf_release,
+       .mmap = privcmd_buf_mmap,
+};
+EXPORT_SYMBOL_GPL(xen_privcmdbuf_fops);
+
+struct miscdevice xen_privcmdbuf_dev = {
+       .minor = MISC_DYNAMIC_MINOR,
+       .name = "xen/hypercall",
+       .fops = &xen_privcmdbuf_fops,
+};
index 8ae0349..7e6e682 100644 (file)
@@ -1007,12 +1007,21 @@ static int __init privcmd_init(void)
                pr_err("Could not register Xen privcmd device\n");
                return err;
        }
+
+       err = misc_register(&xen_privcmdbuf_dev);
+       if (err != 0) {
+               pr_err("Could not register Xen hypercall-buf device\n");
+               misc_deregister(&privcmd_dev);
+               return err;
+       }
+
        return 0;
 }
 
 static void __exit privcmd_exit(void)
 {
        misc_deregister(&privcmd_dev);
+       misc_deregister(&xen_privcmdbuf_dev);
 }
 
 module_init(privcmd_init);
index 14facae..0dd9f8f 100644 (file)
@@ -1,3 +1,6 @@
 #include <linux/fs.h>
 
 extern const struct file_operations xen_privcmd_fops;
+extern const struct file_operations xen_privcmdbuf_fops;
+
+extern struct miscdevice xen_privcmdbuf_dev;
index 7bc88fd..e2f3e8b 100644 (file)
@@ -1012,6 +1012,7 @@ static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
 {
        struct v2p_entry *entry;
        unsigned long flags;
+       int err;
 
        if (try) {
                spin_lock_irqsave(&info->v2p_lock, flags);
@@ -1027,8 +1028,11 @@ static void scsiback_do_add_lun(struct vscsibk_info *info, const char *state,
                        scsiback_del_translation_entry(info, vir);
                }
        } else if (!try) {
-               xenbus_printf(XBT_NIL, info->dev->nodename, state,
+               err = xenbus_printf(XBT_NIL, info->dev->nodename, state,
                              "%d", XenbusStateClosed);
+               if (err)
+                       xenbus_dev_error(info->dev, err,
+                               "%s: writing %s", __func__, state);
        }
 }
 
@@ -1067,8 +1071,11 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
        snprintf(str, sizeof(str), "vscsi-devs/%s/p-dev", ent);
        val = xenbus_read(XBT_NIL, dev->nodename, str, NULL);
        if (IS_ERR(val)) {
-               xenbus_printf(XBT_NIL, dev->nodename, state,
+               err = xenbus_printf(XBT_NIL, dev->nodename, state,
                              "%d", XenbusStateClosed);
+               if (err)
+                       xenbus_dev_error(info->dev, err,
+                               "%s: writing %s", __func__, state);
                return;
        }
        strlcpy(phy, val, VSCSI_NAMELEN);
@@ -1079,8 +1086,11 @@ static void scsiback_do_1lun_hotplug(struct vscsibk_info *info, int op,
        err = xenbus_scanf(XBT_NIL, dev->nodename, str, "%u:%u:%u:%u",
                           &vir.hst, &vir.chn, &vir.tgt, &vir.lun);
        if (XENBUS_EXIST_ERR(err)) {
-               xenbus_printf(XBT_NIL, dev->nodename, state,
+               err = xenbus_printf(XBT_NIL, dev->nodename, state,
                              "%d", XenbusStateClosed);
+               if (err)
+                       xenbus_dev_error(info->dev, err,
+                               "%s: writing %s", __func__, state);
                return;
        }
 
index cc40802..00e759f 100644 (file)
@@ -748,7 +748,6 @@ extern void ext2_free_blocks (struct inode *, unsigned long,
                              unsigned long);
 extern unsigned long ext2_count_free_blocks (struct super_block *);
 extern unsigned long ext2_count_dirs (struct super_block *);
-extern void ext2_check_blocks_bitmap (struct super_block *);
 extern struct ext2_group_desc * ext2_get_group_desc(struct super_block * sb,
                                                    unsigned int block_group,
                                                    struct buffer_head ** bh);
@@ -771,7 +770,6 @@ extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page
 extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *);
 extern void ext2_free_inode (struct inode *);
 extern unsigned long ext2_count_free_inodes (struct super_block *);
-extern void ext2_check_inodes_bitmap (struct super_block *);
 extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 
 /* inode.c */
index 25ab127..8ff53f8 100644 (file)
@@ -557,6 +557,9 @@ static int parse_options(char *options, struct super_block *sb,
                        set_opt (opts->s_mount_opt, NO_UID32);
                        break;
                case Opt_nocheck:
+                       ext2_msg(sb, KERN_WARNING,
+                               "Option nocheck/check=none is deprecated and"
+                               " will be removed in June 2020.");
                        clear_opt (opts->s_mount_opt, CHECK);
                        break;
                case Opt_debug:
@@ -1335,9 +1338,6 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
        new_opts.s_resgid = sbi->s_resgid;
        spin_unlock(&sbi->s_lock);
 
-       /*
-        * Allow the "check" option to be passed as a remount option.
-        */
        if (!parse_options(data, sb, &new_opts))
                return -EINVAL;
 
index c60f3d3..a679798 100644 (file)
@@ -491,15 +491,17 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size)
        if (size > PSIZE) {
                /*
                 * To keep the rest of the code simple.  Allocate a
-                * contiguous buffer to work with
+                * contiguous buffer to work with. Make the buffer large
+                * enough to make use of the whole extent.
                 */
-               ea_buf->xattr = kmalloc(size, GFP_KERNEL);
+               ea_buf->max_size = (size + sb->s_blocksize - 1) &
+                   ~(sb->s_blocksize - 1);
+
+               ea_buf->xattr = kmalloc(ea_buf->max_size, GFP_KERNEL);
                if (ea_buf->xattr == NULL)
                        return -ENOMEM;
 
                ea_buf->flag = EA_MALLOC;
-               ea_buf->max_size = (size + sb->s_blocksize - 1) &
-                   ~(sb->s_blocksize - 1);
 
                if (ea_size == 0)
                        return 0;
index bbd0465..f033f3a 100644 (file)
@@ -883,8 +883,10 @@ struct inode *nfs_delegation_find_inode(struct nfs_client *clp,
        rcu_read_lock();
        list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
                res = nfs_delegation_find_inode_server(server, fhandle);
-               if (res != ERR_PTR(-ENOENT))
+               if (res != ERR_PTR(-ENOENT)) {
+                       rcu_read_unlock();
                        return res;
+               }
        }
        rcu_read_unlock();
        return ERR_PTR(-ENOENT);
index d4a07ac..8f00379 100644 (file)
@@ -1243,17 +1243,18 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
                                           hdr->ds_clp, hdr->lseg,
                                           hdr->pgio_mirror_idx);
 
+       clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
+       clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
        switch (err) {
        case -NFS4ERR_RESET_TO_PNFS:
                if (ff_layout_choose_best_ds_for_read(hdr->lseg,
                                        hdr->pgio_mirror_idx + 1,
                                        &hdr->pgio_mirror_idx))
                        goto out_eagain;
-               ff_layout_read_record_layoutstats_done(task, hdr);
-               pnfs_read_resend_pnfs(hdr);
+               set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
                return task->tk_status;
        case -NFS4ERR_RESET_TO_MDS:
-               ff_layout_reset_read(hdr);
+               set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
                return task->tk_status;
        case -EAGAIN:
                goto out_eagain;
@@ -1403,6 +1404,10 @@ static void ff_layout_read_release(void *data)
        struct nfs_pgio_header *hdr = data;
 
        ff_layout_read_record_layoutstats_done(&hdr->task, hdr);
+       if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+               pnfs_read_resend_pnfs(hdr);
+       else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+               ff_layout_reset_read(hdr);
        pnfs_generic_rw_release(data);
 }
 
@@ -1423,12 +1428,14 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
                                           hdr->ds_clp, hdr->lseg,
                                           hdr->pgio_mirror_idx);
 
+       clear_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
+       clear_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
        switch (err) {
        case -NFS4ERR_RESET_TO_PNFS:
-               ff_layout_reset_write(hdr, true);
+               set_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags);
                return task->tk_status;
        case -NFS4ERR_RESET_TO_MDS:
-               ff_layout_reset_write(hdr, false);
+               set_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags);
                return task->tk_status;
        case -EAGAIN:
                return -EAGAIN;
@@ -1575,6 +1582,10 @@ static void ff_layout_write_release(void *data)
        struct nfs_pgio_header *hdr = data;
 
        ff_layout_write_record_layoutstats_done(&hdr->task, hdr);
+       if (test_bit(NFS_IOHDR_RESEND_PNFS, &hdr->flags))
+               ff_layout_reset_write(hdr, true);
+       else if (test_bit(NFS_IOHDR_RESEND_MDS, &hdr->flags))
+               ff_layout_reset_write(hdr, false);
        pnfs_generic_rw_release(data);
 }
 
index ed45090..6dd1468 100644 (file)
@@ -3294,6 +3294,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
        struct nfs4_closedata *calldata = data;
        struct nfs4_state *state = calldata->state;
        struct inode *inode = calldata->inode;
+       struct pnfs_layout_hdr *lo;
        bool is_rdonly, is_wronly, is_rdwr;
        int call_close = 0;
 
@@ -3337,6 +3338,12 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
                goto out_wait;
        }
 
+       lo = calldata->arg.lr_args ? calldata->arg.lr_args->layout : NULL;
+       if (lo && !pnfs_layout_is_valid(lo)) {
+               calldata->arg.lr_args = NULL;
+               calldata->res.lr_res = NULL;
+       }
+
        if (calldata->arg.fmode == 0)
                task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
 
@@ -5972,12 +5979,19 @@ static void nfs4_delegreturn_release(void *calldata)
 static void nfs4_delegreturn_prepare(struct rpc_task *task, void *data)
 {
        struct nfs4_delegreturndata *d_data;
+       struct pnfs_layout_hdr *lo;
 
        d_data = (struct nfs4_delegreturndata *)data;
 
        if (!d_data->lr.roc && nfs4_wait_on_layoutreturn(d_data->inode, task))
                return;
 
+       lo = d_data->args.lr_args ? d_data->args.lr_args->layout : NULL;
+       if (lo && !pnfs_layout_is_valid(lo)) {
+               d_data->args.lr_args = NULL;
+               d_data->res.lr_res = NULL;
+       }
+
        nfs4_setup_sequence(d_data->res.server->nfs_client,
                        &d_data->args.seq_args,
                        &d_data->res.seq_res,
@@ -8650,6 +8664,8 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 
        dprintk("--> %s tk_status => %d\n", __func__, -task->tk_status);
 
+       nfs4_sequence_free_slot(&lgp->res.seq_res);
+
        switch (nfs4err) {
        case 0:
                goto out;
@@ -8714,7 +8730,6 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
                goto out;
        }
 
-       nfs4_sequence_free_slot(&lgp->res.seq_res);
        err = nfs4_handle_exception(server, nfs4err, exception);
        if (!status) {
                if (exception->retry)
@@ -8786,20 +8801,22 @@ nfs4_proc_layoutget(struct nfs4_layoutget *lgp, long *timeout)
        if (IS_ERR(task))
                return ERR_CAST(task);
        status = rpc_wait_for_completion_task(task);
-       if (status == 0) {
+       if (status != 0)
+               goto out;
+
+       /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
+       if (task->tk_status < 0 || lgp->res.layoutp->len == 0) {
                status = nfs4_layoutget_handle_exception(task, lgp, &exception);
                *timeout = exception.timeout;
-       }
-
+       } else
+               lseg = pnfs_layout_process(lgp);
+out:
        trace_nfs4_layoutget(lgp->args.ctx,
                        &lgp->args.range,
                        &lgp->res.range,
                        &lgp->res.stateid,
                        status);
 
-       /* if layoutp->len is 0, nfs4_layoutget_prepare called rpc_exit */
-       if (status == 0 && lgp->res.layoutp->len)
-               lseg = pnfs_layout_process(lgp);
        rpc_put_task(task);
        dprintk("<-- %s status=%d\n", __func__, status);
        if (status)
@@ -8817,6 +8834,8 @@ nfs4_layoutreturn_prepare(struct rpc_task *task, void *calldata)
                        &lrp->args.seq_args,
                        &lrp->res.seq_res,
                        task);
+       if (!pnfs_layout_is_valid(lrp->args.layout))
+               rpc_exit(task, 0);
 }
 
 static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
index a8f5e6b..3fe8142 100644 (file)
@@ -801,6 +801,11 @@ static inline void nfs4_lgopen_release(struct nfs4_layoutget *lgp)
 {
 }
 
+static inline bool pnfs_layout_is_valid(const struct pnfs_layout_hdr *lo)
+{
+       return false;
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #if IS_ENABLED(CONFIG_NFS_V4_2)
index b657294..aaffc0c 100644 (file)
@@ -235,6 +235,10 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
        if (env_start != arg_end || env_start >= env_end)
                env_start = env_end = arg_end;
 
+       /* .. and limit it to a maximum of one page of slop */
+       if (env_end >= arg_end + PAGE_SIZE)
+               env_end = arg_end + PAGE_SIZE - 1;
+
        /* We're not going to care if "*ppos" has high bits set */
        pos = arg_start + *ppos;
 
@@ -254,10 +258,19 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
        while (count) {
                int got;
                size_t size = min_t(size_t, PAGE_SIZE, count);
+               long offset;
 
-               got = access_remote_vm(mm, pos, page, size, FOLL_ANON);
-               if (got <= 0)
+               /*
+                * Are we already starting past the official end?
+                * We always include the last byte that is *supposed*
+                * to be NUL
+                */
+               offset = (pos >= arg_end) ? pos - arg_end + 1 : 0;
+
+               got = access_remote_vm(mm, pos - offset, page, size + offset, FOLL_ANON);
+               if (got <= offset)
                        break;
+               got -= offset;
 
                /* Don't walk past a NUL character once you hit arg_end */
                if (pos + got >= arg_end) {
@@ -276,12 +289,17 @@ static ssize_t get_mm_cmdline(struct mm_struct *mm, char __user *buf,
                                n = arg_end - pos - 1;
 
                        /* Cut off at first NUL after 'n' */
-                       got = n + strnlen(page+n, got-n);
-                       if (!got)
+                       got = n + strnlen(page+n, offset+got-n);
+                       if (got < offset)
                                break;
+                       got -= offset;
+
+                       /* Include the NUL if it existed */
+                       if (got < size)
+                               got++;
                }
 
-               got -= copy_to_user(buf, page, got);
+               got -= copy_to_user(buf, page+offset, got);
                if (unlikely(!got)) {
                        if (!len)
                                len = -EFAULT;
index d88231e..fc20e06 100644 (file)
@@ -711,21 +711,18 @@ EXPORT_SYMBOL(dquot_quota_sync);
 static unsigned long
 dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
-       struct list_head *head;
        struct dquot *dquot;
        unsigned long freed = 0;
 
        spin_lock(&dq_list_lock);
-       head = free_dquots.prev;
-       while (head != &free_dquots && sc->nr_to_scan) {
-               dquot = list_entry(head, struct dquot, dq_free);
+       while (!list_empty(&free_dquots) && sc->nr_to_scan) {
+               dquot = list_first_entry(&free_dquots, struct dquot, dq_free);
                remove_dquot_hash(dquot);
                remove_free_dquot(dquot);
                remove_inuse(dquot);
                do_destroy_dquot(dquot);
                sc->nr_to_scan--;
                freed++;
-               head = free_dquots.prev;
        }
        spin_unlock(&dq_list_lock);
        return freed;
index 1b961b1..fcda0fc 100644 (file)
@@ -533,8 +533,7 @@ static int udf_table_prealloc_blocks(struct super_block *sb,
                        udf_write_aext(table, &epos, &eloc,
                                        (etype << 30) | elen, 1);
                } else
-                       udf_delete_aext(table, epos, eloc,
-                                       (etype << 30) | elen);
+                       udf_delete_aext(table, epos);
        } else {
                alloc_count = 0;
        }
@@ -630,7 +629,7 @@ static udf_pblk_t udf_table_new_block(struct super_block *sb,
        if (goal_elen)
                udf_write_aext(table, &goal_epos, &goal_eloc, goal_elen, 1);
        else
-               udf_delete_aext(table, goal_epos, goal_eloc, goal_elen);
+               udf_delete_aext(table, goal_epos);
        brelse(goal_epos.bh);
 
        udf_add_free_space(sb, partition, -1);
index 0a98a23..d952301 100644 (file)
@@ -141,10 +141,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
                               fibh->ebh->b_data,
                               sizeof(struct fileIdentDesc) + fibh->soffset);
 
-                       fi_len = (sizeof(struct fileIdentDesc) +
-                                 cfi->lengthFileIdent +
-                                 le16_to_cpu(cfi->lengthOfImpUse) + 3) & ~3;
-
+                       fi_len = udf_dir_entry_len(cfi);
                        *nf_pos += fi_len - (fibh->eoffset - fibh->soffset);
                        fibh->eoffset = fibh->soffset + fi_len;
                } else {
@@ -152,6 +149,9 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos,
                               sizeof(struct fileIdentDesc));
                }
        }
+       /* Got last entry outside of dir size - fs is corrupted! */
+       if (*nf_pos > dir->i_size)
+               return NULL;
        return fi;
 }
 
index 7f39d17..9915a58 100644 (file)
@@ -1147,8 +1147,7 @@ static void udf_update_extents(struct inode *inode, struct kernel_long_ad *laarr
 
        if (startnum > endnum) {
                for (i = 0; i < (startnum - endnum); i++)
-                       udf_delete_aext(inode, *epos, laarr[i].extLocation,
-                                       laarr[i].extLength);
+                       udf_delete_aext(inode, *epos);
        } else if (startnum < endnum) {
                for (i = 0; i < (endnum - startnum); i++) {
                        udf_insert_aext(inode, *epos, laarr[i].extLocation,
@@ -2176,14 +2175,15 @@ static int8_t udf_insert_aext(struct inode *inode, struct extent_position epos,
        return (nelen >> 30);
 }
 
-int8_t udf_delete_aext(struct inode *inode, struct extent_position epos,
-                      struct kernel_lb_addr eloc, uint32_t elen)
+int8_t udf_delete_aext(struct inode *inode, struct extent_position epos)
 {
        struct extent_position oepos;
        int adsize;
        int8_t etype;
        struct allocExtDesc *aed;
        struct udf_inode_info *iinfo;
+       struct kernel_lb_addr eloc;
+       uint32_t elen;
 
        if (epos.bh) {
                get_bh(epos.bh);
index c586026..06f37dd 100644 (file)
@@ -351,8 +351,6 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
        loff_t f_pos;
        loff_t size = udf_ext0_offset(dir) + dir->i_size;
        int nfidlen;
-       uint8_t lfi;
-       uint16_t liu;
        udf_pblk_t block;
        struct kernel_lb_addr eloc;
        uint32_t elen = 0;
@@ -383,7 +381,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
                namelen = 0;
        }
 
-       nfidlen = (sizeof(struct fileIdentDesc) + namelen + 3) & ~3;
+       nfidlen = ALIGN(sizeof(struct fileIdentDesc) + namelen, UDF_NAME_PAD);
 
        f_pos = udf_ext0_offset(dir);
 
@@ -424,12 +422,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
                        goto out_err;
                }
 
-               liu = le16_to_cpu(cfi->lengthOfImpUse);
-               lfi = cfi->lengthFileIdent;
-
                if ((cfi->fileCharacteristics & FID_FILE_CHAR_DELETED) != 0) {
-                       if (((sizeof(struct fileIdentDesc) +
-                                       liu + lfi + 3) & ~3) == nfidlen) {
+                       if (udf_dir_entry_len(cfi) == nfidlen) {
                                cfi->descTag.tagSerialNum = cpu_to_le16(1);
                                cfi->fileVersionNum = cpu_to_le16(1);
                                cfi->fileCharacteristics = 0;
@@ -1201,9 +1195,7 @@ static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
 
        if (dir_fi) {
                dir_fi->icb.extLocation = cpu_to_lelb(UDF_I(new_dir)->i_location);
-               udf_update_tag((char *)dir_fi,
-                               (sizeof(struct fileIdentDesc) +
-                               le16_to_cpu(dir_fi->lengthOfImpUse) + 3) & ~3);
+               udf_update_tag((char *)dir_fi, udf_dir_entry_len(dir_fi));
                if (old_iinfo->i_alloc_type == ICBTAG_FLAG_AD_IN_ICB)
                        mark_inode_dirty(old_inode);
                else
index bae311b..84c47dd 100644 (file)
@@ -132,6 +132,12 @@ struct inode *udf_find_metadata_inode_efe(struct super_block *sb,
 extern int udf_write_fi(struct inode *inode, struct fileIdentDesc *,
                        struct fileIdentDesc *, struct udf_fileident_bh *,
                        uint8_t *, uint8_t *);
+static inline unsigned int udf_dir_entry_len(struct fileIdentDesc *cfi)
+{
+       return ALIGN(sizeof(struct fileIdentDesc) +
+               le16_to_cpu(cfi->lengthOfImpUse) + cfi->lengthFileIdent,
+               UDF_NAME_PAD);
+}
 
 /* file.c */
 extern long udf_ioctl(struct file *, unsigned int, unsigned long);
@@ -167,8 +173,7 @@ extern int udf_add_aext(struct inode *, struct extent_position *,
                        struct kernel_lb_addr *, uint32_t, int);
 extern void udf_write_aext(struct inode *, struct extent_position *,
                           struct kernel_lb_addr *, uint32_t, int);
-extern int8_t udf_delete_aext(struct inode *, struct extent_position,
-                             struct kernel_lb_addr, uint32_t);
+extern int8_t udf_delete_aext(struct inode *, struct extent_position);
 extern int8_t udf_next_aext(struct inode *, struct extent_position *,
                            struct kernel_lb_addr *, uint32_t *, int);
 extern int8_t udf_current_aext(struct inode *, struct extent_position *,
index 40a916e..1194a4c 100644 (file)
@@ -309,7 +309,7 @@ static inline void acpi_processor_ppc_exit(void)
 {
        return;
 }
-static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr,
+static inline void acpi_processor_ppc_has_changed(struct acpi_processor *pr,
                                                                int event_flag)
 {
        static unsigned int printout = 1;
@@ -320,7 +320,6 @@ static inline int acpi_processor_ppc_has_changed(struct acpi_processor *pr,
                       "Consider compiling CPUfreq support into your kernel.\n");
                printout = 0;
        }
-       return 0;
 }
 static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 {
index 0763f06..d10f1e7 100644 (file)
@@ -63,7 +63,7 @@ typedef struct qspinlock {
 /*
  * Initializier
  */
-#define        __ARCH_SPIN_LOCK_UNLOCKED       { .val = ATOMIC_INIT(0) }
+#define        __ARCH_SPIN_LOCK_UNLOCKED       { { .val = ATOMIC_INIT(0) } }
 
 /*
  * Bitfields in the atomic value:
index 0c27515..8124815 100644 (file)
@@ -214,6 +214,7 @@ struct atmphy_ops {
 struct atm_skb_data {
        struct atm_vcc  *vcc;           /* ATM VCC */
        unsigned long   atm_options;    /* ATM layer options */
+       unsigned int    acct_truesize;  /* truesize accounted to vcc */
 };
 
 #define VCC_HTABLE_SIZE 32
@@ -241,6 +242,20 @@ void vcc_insert_socket(struct sock *sk);
 
 void atm_dev_release_vccs(struct atm_dev *dev);
 
+static inline void atm_account_tx(struct atm_vcc *vcc, struct sk_buff *skb)
+{
+       /*
+        * Because ATM skbs may not belong to a sock (and we don't
+        * necessarily want to), skb->truesize may be adjusted,
+        * escaping the hack in pskb_expand_head() which avoids
+        * doing so for some cases. So stash the value of truesize
+        * at the time we accounted it, and atm_pop_raw() can use
+        * that value later, in case it changes.
+        */
+       refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
+       ATM_SKB(skb)->acct_truesize = skb->truesize;
+       ATM_SKB(skb)->atm_options = vcc->atm_options;
+}
 
 static inline void atm_force_charge(struct atm_vcc *vcc,int truesize)
 {
index 0bd432a..2425176 100644 (file)
@@ -22,7 +22,6 @@ struct dentry;
  */
 enum wb_state {
        WB_registered,          /* bdi_register() was done */
-       WB_shutting_down,       /* wb_shutdown() in progress */
        WB_writeback_running,   /* Writeback is in progress */
        WB_has_dirty_io,        /* Dirty inodes on ->b_{dirty|io|more_io} */
        WB_start_all,           /* nr_pages == 0 (all) work pending */
@@ -189,6 +188,7 @@ struct backing_dev_info {
 #ifdef CONFIG_CGROUP_WRITEBACK
        struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */
        struct rb_root cgwb_congested_tree; /* their congested states */
+       struct mutex cgwb_release_mutex;  /* protect shutdown of wb structs */
 #else
        struct bdi_writeback_congested *wb_congested;
 #endif
index 995c3b1..7df32a3 100644 (file)
@@ -488,12 +488,15 @@ void bpf_patch_call_args(struct bpf_insn *insn, u32 stack_depth);
 
 /* Map specifics */
 struct xdp_buff;
+struct sk_buff;
 
 struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key);
 void __dev_map_insert_ctx(struct bpf_map *map, u32 index);
 void __dev_map_flush(struct bpf_map *map);
 int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
                    struct net_device *dev_rx);
+int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
+                            struct bpf_prog *xdp_prog);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_insert_ctx(struct bpf_map *map, u32 index);
@@ -586,6 +589,15 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
        return 0;
 }
 
+struct sk_buff;
+
+static inline int dev_map_generic_redirect(struct bpf_dtab_netdev *dst,
+                                          struct sk_buff *skb,
+                                          struct bpf_prog *xdp_prog)
+{
+       return 0;
+}
+
 static inline
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key)
 {
index b67bf6a..3c5a4cb 100644 (file)
@@ -48,7 +48,7 @@
  *   CMA should not be used by the device drivers directly. It is
  *   only a helper framework for dma-mapping subsystem.
  *
- *   For more information, see kernel-docs in drivers/base/dma-contiguous.c
+ *   For more information, see kernel-docs in kernel/dma/contiguous.c
  */
 
 #ifdef __KERNEL__
index 45fc0f5..b615df5 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/cryptohash.h>
 #include <linux/set_memory.h>
 #include <linux/kallsyms.h>
+#include <linux/if_vlan.h>
 
 #include <net/sch_generic.h>
 
@@ -469,7 +470,8 @@ struct sock_fprog_kern {
 };
 
 struct bpf_binary_header {
-       unsigned int pages;
+       u16 pages;
+       u16 locked:1;
        u8 image[];
 };
 
@@ -671,15 +673,18 @@ bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
-#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
        fp->locked = 1;
-       WARN_ON_ONCE(set_memory_ro((unsigned long)fp, fp->pages));
+       if (set_memory_ro((unsigned long)fp, fp->pages))
+               fp->locked = 0;
+#endif
 }
 
 static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
        if (fp->locked) {
                WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
                /* In case set_memory_rw() fails, we want to be the first
@@ -687,34 +692,30 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
                 */
                fp->locked = 0;
        }
+#endif
 }
 
 static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 {
-       WARN_ON_ONCE(set_memory_ro((unsigned long)hdr, hdr->pages));
-}
-
-static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
-{
-       WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
-}
-#else
-static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
-{
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+       hdr->locked = 1;
+       if (set_memory_ro((unsigned long)hdr, hdr->pages))
+               hdr->locked = 0;
+#endif
 }
 
 static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 {
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+       if (hdr->locked) {
+               WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
+               /* In case set_memory_rw() fails, we want to be the first
+                * to crash here instead of some random place later on.
+                */
+               hdr->locked = 0;
+       }
+#endif
 }
-#endif /* CONFIG_ARCH_HAS_SET_MEMORY */
 
 static inline struct bpf_binary_header *
 bpf_jit_binary_hdr(const struct bpf_prog *fp)
@@ -725,6 +726,22 @@ bpf_jit_binary_hdr(const struct bpf_prog *fp)
        return (void *)addr;
 }
 
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
+{
+       if (!fp->locked)
+               return -ENOLCK;
+       if (fp->jited) {
+               const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
+
+               if (!hdr->locked)
+                       return -ENOLCK;
+       }
+
+       return 0;
+}
+#endif
+
 int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 {
@@ -786,6 +803,21 @@ static inline bool bpf_dump_raw_ok(void)
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
                                       const struct bpf_insn *patch, u32 len);
 
+static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
+                                          struct net_device *fwd)
+{
+       unsigned int len;
+
+       if (unlikely(!(fwd->flags & IFF_UP)))
+               return -ENETDOWN;
+
+       len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
+       if (skb->len > len)
+               return -EMSGSIZE;
+
+       return 0;
+}
+
 /* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
  * same cpu context. Further for best results no more than a single map
  * for the do_redirect/do_flush pair should be used. This limitation is
@@ -961,6 +993,9 @@ static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 }
 #endif /* CONFIG_BPF_JIT */
 
+void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
+void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
+
 #define BPF_ANC                BIT(15)
 
 static inline bool bpf_needs_clear_a(const struct sock_filter *first)
index 4bd2f34..201de12 100644 (file)
@@ -503,6 +503,7 @@ struct irq_chip {
  * IRQCHIP_SKIP_SET_WAKE:      Skip chip.irq_set_wake(), for this irq chip
  * IRQCHIP_ONESHOT_SAFE:       One shot does not require mask/unmask
  * IRQCHIP_EOI_THREADED:       Chip requires eoi() on unmask in threaded mode
+ * IRQCHIP_SUPPORTS_LEVEL_MSI  Chip can provide two doorbells for Level MSIs
  */
 enum {
        IRQCHIP_SET_TYPE_MASKED         = (1 <<  0),
index 25b33b6..dd1e40d 100644 (file)
@@ -145,11 +145,6 @@ static inline void *irq_desc_get_handler_data(struct irq_desc *desc)
        return desc->irq_common_data.handler_data;
 }
 
-static inline struct msi_desc *irq_desc_get_msi_desc(struct irq_desc *desc)
-{
-       return desc->irq_common_data.msi_desc;
-}
-
 /*
  * Architectures call this to let the generic IRQ layer
  * handle an interrupt.
index 9dee3c2..712eed1 100644 (file)
@@ -1438,6 +1438,8 @@ enum {
        NFS_IOHDR_EOF,
        NFS_IOHDR_REDO,
        NFS_IOHDR_STAT,
+       NFS_IOHDR_RESEND_PNFS,
+       NFS_IOHDR_RESEND_MDS,
 };
 
 struct nfs_io_completion;
index 4193c41..a685da2 100644 (file)
@@ -98,5 +98,7 @@ extern __must_check bool refcount_dec_if_one(refcount_t *r);
 extern __must_check bool refcount_dec_not_one(refcount_t *r);
 extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
 extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
-
+extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
+                                                      spinlock_t *lock,
+                                                      unsigned long *flags);
 #endif /* _LINUX_REFCOUNT_H */
index 1e8a464..fd57888 100644 (file)
@@ -427,6 +427,11 @@ extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock);
 #define atomic_dec_and_lock(atomic, lock) \
                __cond_lock(lock, _atomic_dec_and_lock(atomic, lock))
 
+extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
+                                       unsigned long *flags);
+#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \
+               __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)))
+
 int alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask,
                           size_t max_size, unsigned int cpu_mult,
                           gfp_t gfp);
index 5cba71d..71b9043 100644 (file)
@@ -170,6 +170,7 @@ struct fib6_info {
                                        unused:3;
 
        struct fib6_nh                  fib6_nh;
+       struct rcu_head                 rcu;
 };
 
 struct rt6_info {
@@ -273,7 +274,7 @@ static inline void ip6_rt_put(struct rt6_info *rt)
 }
 
 struct fib6_info *fib6_info_alloc(gfp_t gfp_flags);
-void fib6_info_destroy(struct fib6_info *f6i);
+void fib6_info_destroy_rcu(struct rcu_head *head);
 
 static inline void fib6_info_hold(struct fib6_info *f6i)
 {
@@ -283,7 +284,7 @@ static inline void fib6_info_hold(struct fib6_info *f6i)
 static inline void fib6_info_release(struct fib6_info *f6i)
 {
        if (f6i && atomic_dec_and_test(&f6i->fib6_ref))
-               fib6_info_destroy(f6i);
+               call_rcu(&f6i->rcu, fib6_info_destroy_rcu);
 }
 
 enum fib6_walk_state {
index 4c6241b..6c00399 100644 (file)
@@ -3391,11 +3391,14 @@ int ib_process_cq_direct(struct ib_cq *cq, int budget);
  *
  * Users can examine the cq structure to determine the actual CQ size.
  */
-struct ib_cq *ib_create_cq(struct ib_device *device,
-                          ib_comp_handler comp_handler,
-                          void (*event_handler)(struct ib_event *, void *),
-                          void *cq_context,
-                          const struct ib_cq_init_attr *cq_attr);
+struct ib_cq *__ib_create_cq(struct ib_device *device,
+                            ib_comp_handler comp_handler,
+                            void (*event_handler)(struct ib_event *, void *),
+                            void *cq_context,
+                            const struct ib_cq_init_attr *cq_attr,
+                            const char *caller);
+#define ib_create_cq(device, cmp_hndlr, evt_hndlr, cq_ctxt, cq_attr) \
+       __ib_create_cq((device), (cmp_hndlr), (evt_hndlr), (cq_ctxt), (cq_attr), KBUILD_MODNAME)
 
 /**
  * ib_resize_cq - Modifies the capacity of the CQ.
index 85a3fb6..20d6cc9 100644 (file)
@@ -53,6 +53,9 @@ enum {
 /* These are client behavior specific flags. */
 #define NBD_CFLAG_DESTROY_ON_DISCONNECT        (1 << 0) /* delete the nbd device on
                                                    disconnect. */
+#define NBD_CFLAG_DISCONNECT_ON_CLOSE (1 << 1) /* disconnect the nbd device on
+                                               *  close by last opener.
+                                               */
 
 /* userspace doesn't need the nbd_device structure */
 
index 9d4340c..1e1d9bd 100644 (file)
@@ -25,12 +25,16 @@ extern bool xen_pvh;
 #define xen_hvm_domain()       (xen_domain_type == XEN_HVM_DOMAIN)
 #define xen_pvh_domain()       (xen_pvh)
 
+#include <linux/types.h>
+
+extern uint32_t xen_start_flags;
+
 #ifdef CONFIG_XEN_DOM0
 #include <xen/interface/xen.h>
 #include <asm/xen/hypervisor.h>
 
 #define xen_initial_domain()   (xen_domain() && \
-                                xen_start_info && xen_start_info->flags & SIF_INITDOMAIN)
+                                (xen_start_flags & SIF_INITDOMAIN))
 #else  /* !CONFIG_XEN_DOM0 */
 #define xen_initial_domain()   (0)
 #endif /* CONFIG_XEN_DOM0 */
index 5a52f07..fde3d09 100644 (file)
@@ -1719,10 +1719,6 @@ source "arch/Kconfig"
 
 endmenu                # General setup
 
-config HAVE_GENERIC_DMA_COHERENT
-       bool
-       default n
-
 config RT_MUTEXES
        bool
 
index d200162..04bc07c 100644 (file)
@@ -41,6 +41,7 @@ obj-y += printk/
 obj-y += irq/
 obj-y += rcu/
 obj-y += livepatch/
+obj-y += dma/
 
 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
 obj-$(CONFIG_FREEZER) += freezer.o
index 9f14937..a9e6c04 100644 (file)
@@ -350,6 +350,20 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
        return prog_adj;
 }
 
+void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp)
+{
+       int i;
+
+       for (i = 0; i < fp->aux->func_cnt; i++)
+               bpf_prog_kallsyms_del(fp->aux->func[i]);
+}
+
+void bpf_prog_kallsyms_del_all(struct bpf_prog *fp)
+{
+       bpf_prog_kallsyms_del_subprogs(fp);
+       bpf_prog_kallsyms_del(fp);
+}
+
 #ifdef CONFIG_BPF_JIT
 /* All BPF JIT sysctl knobs here. */
 int bpf_jit_enable   __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
@@ -584,6 +598,8 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
        bpf_fill_ill_insns(hdr, size);
 
        hdr->pages = size / PAGE_SIZE;
+       hdr->locked = 0;
+
        hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
                     PAGE_SIZE - sizeof(*hdr));
        start = (get_random_int() % hole) & ~(alignment - 1);
@@ -1434,6 +1450,33 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
        return 0;
 }
 
+static int bpf_prog_check_pages_ro_locked(const struct bpf_prog *fp)
+{
+#ifdef CONFIG_ARCH_HAS_SET_MEMORY
+       int i, err;
+
+       for (i = 0; i < fp->aux->func_cnt; i++) {
+               err = bpf_prog_check_pages_ro_single(fp->aux->func[i]);
+               if (err)
+                       return err;
+       }
+
+       return bpf_prog_check_pages_ro_single(fp);
+#endif
+       return 0;
+}
+
+static void bpf_prog_select_func(struct bpf_prog *fp)
+{
+#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+       u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+
+       fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
+#else
+       fp->bpf_func = __bpf_prog_ret0_warn;
+#endif
+}
+
 /**
  *     bpf_prog_select_runtime - select exec runtime for BPF program
  *     @fp: bpf_prog populated with internal BPF program
@@ -1444,13 +1487,13 @@ static int bpf_check_tail_call(const struct bpf_prog *fp)
  */
 struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
 {
-#ifndef CONFIG_BPF_JIT_ALWAYS_ON
-       u32 stack_depth = max_t(u32, fp->aux->stack_depth, 1);
+       /* In case of BPF to BPF calls, verifier did all the prep
+        * work with regards to JITing, etc.
+        */
+       if (fp->bpf_func)
+               goto finalize;
 
-       fp->bpf_func = interpreters[(round_up(stack_depth, 32) / 32) - 1];
-#else
-       fp->bpf_func = __bpf_prog_ret0_warn;
-#endif
+       bpf_prog_select_func(fp);
 
        /* eBPF JITs can rewrite the program in case constant
         * blinding is active. However, in case of error during
@@ -1471,6 +1514,8 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
                if (*err)
                        return fp;
        }
+
+finalize:
        bpf_prog_lock_ro(fp);
 
        /* The tail call compatibility check can only be done at
@@ -1479,7 +1524,17 @@ struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err)
         * all eBPF JITs might immediately support all features.
         */
        *err = bpf_check_tail_call(fp);
-
+       if (*err)
+               return fp;
+
+       /* Checkpoint: at this point onwards any cBPF -> eBPF or
+        * native eBPF program is read-only. If we failed to change
+        * the page attributes (e.g. allocation failure from
+        * splitting large pages), then reject the whole program
+        * in order to guarantee not ending up with any W+X pages
+        * from BPF side in kernel.
+        */
+       *err = bpf_prog_check_pages_ro_locked(fp);
        return fp;
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
index a7cc7b3..642c97f 100644 (file)
@@ -345,6 +345,20 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
        return bq_enqueue(dst, xdpf, dev_rx);
 }
 
+int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
+                            struct bpf_prog *xdp_prog)
+{
+       int err;
+
+       err = __xdp_generic_ok_fwd_dev(skb, dst->dev);
+       if (unlikely(err))
+               return err;
+       skb->dev = dst->dev;
+       generic_xdp_tx(skb, xdp_prog);
+
+       return 0;
+}
+
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
        struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
index 0fa2062..35dc466 100644 (file)
@@ -1034,14 +1034,9 @@ static void __bpf_prog_put_rcu(struct rcu_head *rcu)
 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock)
 {
        if (atomic_dec_and_test(&prog->aux->refcnt)) {
-               int i;
-
                /* bpf_prog_free_id() must be called first */
                bpf_prog_free_id(prog, do_idr_lock);
-
-               for (i = 0; i < prog->aux->func_cnt; i++)
-                       bpf_prog_kallsyms_del(prog->aux->func[i]);
-               bpf_prog_kallsyms_del(prog);
+               bpf_prog_kallsyms_del_all(prog);
 
                call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu);
        }
@@ -1358,9 +1353,7 @@ static int bpf_prog_load(union bpf_attr *attr)
        if (err < 0)
                goto free_used_maps;
 
-       /* eBPF program is ready to be JITed */
-       if (!prog->bpf_func)
-               prog = bpf_prog_select_runtime(prog, &err);
+       prog = bpf_prog_select_runtime(prog, &err);
        if (err < 0)
                goto free_used_maps;
 
@@ -1384,6 +1377,7 @@ static int bpf_prog_load(union bpf_attr *attr)
        return err;
 
 free_used_maps:
+       bpf_prog_kallsyms_del_subprogs(prog);
        free_used_maps(prog->aux);
 free_prog:
        bpf_prog_uncharge_memlock(prog);
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
new file mode 100644 (file)
index 0000000..9bd5430
--- /dev/null
@@ -0,0 +1,50 @@
+
+config HAS_DMA
+       bool
+       depends on !NO_DMA
+       default y
+
+config NEED_SG_DMA_LENGTH
+       bool
+
+config NEED_DMA_MAP_STATE
+       bool
+
+config ARCH_DMA_ADDR_T_64BIT
+       def_bool 64BIT || PHYS_ADDR_T_64BIT
+
+config HAVE_GENERIC_DMA_COHERENT
+       bool
+
+config ARCH_HAS_SYNC_DMA_FOR_DEVICE
+       bool
+
+config ARCH_HAS_SYNC_DMA_FOR_CPU
+       bool
+       select NEED_DMA_MAP_STATE
+
+config DMA_DIRECT_OPS
+       bool
+       depends on HAS_DMA
+
+config DMA_NONCOHERENT_OPS
+       bool
+       depends on HAS_DMA
+       select DMA_DIRECT_OPS
+
+config DMA_NONCOHERENT_MMAP
+       bool
+       depends on DMA_NONCOHERENT_OPS
+
+config DMA_NONCOHERENT_CACHE_SYNC
+       bool
+       depends on DMA_NONCOHERENT_OPS
+
+config DMA_VIRT_OPS
+       bool
+       depends on HAS_DMA
+
+config SWIOTLB
+       bool
+       select DMA_DIRECT_OPS
+       select NEED_DMA_MAP_STATE
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
new file mode 100644 (file)
index 0000000..6de44e4
--- /dev/null
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_HAS_DMA)                  += mapping.o
+obj-$(CONFIG_DMA_CMA)                  += contiguous.o
+obj-$(CONFIG_HAVE_GENERIC_DMA_COHERENT) += coherent.o
+obj-$(CONFIG_DMA_DIRECT_OPS)           += direct.o
+obj-$(CONFIG_DMA_NONCOHERENT_OPS)      += noncoherent.o
+obj-$(CONFIG_DMA_VIRT_OPS)             += virt.o
+obj-$(CONFIG_DMA_API_DEBUG)            += debug.o
+obj-$(CONFIG_SWIOTLB)                  += swiotlb.o
+
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
new file mode 100644 (file)
index 0000000..597d408
--- /dev/null
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Coherent per-device memory handling.
+ * Borrowed from i386
+ */
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+
+struct dma_coherent_mem {
+       void            *virt_base;
+       dma_addr_t      device_base;
+       unsigned long   pfn_base;
+       int             size;
+       int             flags;
+       unsigned long   *bitmap;
+       spinlock_t      spinlock;
+       bool            use_dev_dma_pfn_offset;
+};
+
+static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init;
+
+static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev)
+{
+       if (dev && dev->dma_mem)
+               return dev->dma_mem;
+       return NULL;
+}
+
+static inline dma_addr_t dma_get_device_base(struct device *dev,
+                                            struct dma_coherent_mem * mem)
+{
+       if (mem->use_dev_dma_pfn_offset)
+               return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
+       else
+               return mem->device_base;
+}
+
+static int dma_init_coherent_memory(
+       phys_addr_t phys_addr, dma_addr_t device_addr, size_t size, int flags,
+       struct dma_coherent_mem **mem)
+{
+       struct dma_coherent_mem *dma_mem = NULL;
+       void __iomem *mem_base = NULL;
+       int pages = size >> PAGE_SHIFT;
+       int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
+       int ret;
+
+       if (!size) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       mem_base = memremap(phys_addr, size, MEMREMAP_WC);
+       if (!mem_base) {
+               ret = -EINVAL;
+               goto out;
+       }
+       dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
+       if (!dma_mem) {
+               ret = -ENOMEM;
+               goto out;
+       }
+       dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
+       if (!dma_mem->bitmap) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       dma_mem->virt_base = mem_base;
+       dma_mem->device_base = device_addr;
+       dma_mem->pfn_base = PFN_DOWN(phys_addr);
+       dma_mem->size = pages;
+       dma_mem->flags = flags;
+       spin_lock_init(&dma_mem->spinlock);
+
+       *mem = dma_mem;
+       return 0;
+
+out:
+       kfree(dma_mem);
+       if (mem_base)
+               memunmap(mem_base);
+       return ret;
+}
+
+static void dma_release_coherent_memory(struct dma_coherent_mem *mem)
+{
+       if (!mem)
+               return;
+
+       memunmap(mem->virt_base);
+       kfree(mem->bitmap);
+       kfree(mem);
+}
+
+static int dma_assign_coherent_memory(struct device *dev,
+                                     struct dma_coherent_mem *mem)
+{
+       if (!dev)
+               return -ENODEV;
+
+       if (dev->dma_mem)
+               return -EBUSY;
+
+       dev->dma_mem = mem;
+       return 0;
+}
+
+int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
+                               dma_addr_t device_addr, size_t size, int flags)
+{
+       struct dma_coherent_mem *mem;
+       int ret;
+
+       ret = dma_init_coherent_memory(phys_addr, device_addr, size, flags, &mem);
+       if (ret)
+               return ret;
+
+       ret = dma_assign_coherent_memory(dev, mem);
+       if (ret)
+               dma_release_coherent_memory(mem);
+       return ret;
+}
+EXPORT_SYMBOL(dma_declare_coherent_memory);
+
+void dma_release_declared_memory(struct device *dev)
+{
+       struct dma_coherent_mem *mem = dev->dma_mem;
+
+       if (!mem)
+               return;
+       dma_release_coherent_memory(mem);
+       dev->dma_mem = NULL;
+}
+EXPORT_SYMBOL(dma_release_declared_memory);
+
+void *dma_mark_declared_memory_occupied(struct device *dev,
+                                       dma_addr_t device_addr, size_t size)
+{
+       struct dma_coherent_mem *mem = dev->dma_mem;
+       unsigned long flags;
+       int pos, err;
+
+       size += device_addr & ~PAGE_MASK;
+
+       if (!mem)
+               return ERR_PTR(-EINVAL);
+
+       spin_lock_irqsave(&mem->spinlock, flags);
+       pos = PFN_DOWN(device_addr - dma_get_device_base(dev, mem));
+       err = bitmap_allocate_region(mem->bitmap, pos, get_order(size));
+       spin_unlock_irqrestore(&mem->spinlock, flags);
+
+       if (err != 0)
+               return ERR_PTR(err);
+       return mem->virt_base + (pos << PAGE_SHIFT);
+}
+EXPORT_SYMBOL(dma_mark_declared_memory_occupied);
+
+static void *__dma_alloc_from_coherent(struct dma_coherent_mem *mem,
+               ssize_t size, dma_addr_t *dma_handle)
+{
+       int order = get_order(size);
+       unsigned long flags;
+       int pageno;
+       void *ret;
+
+       spin_lock_irqsave(&mem->spinlock, flags);
+
+       if (unlikely(size > (mem->size << PAGE_SHIFT)))
+               goto err;
+
+       pageno = bitmap_find_free_region(mem->bitmap, mem->size, order);
+       if (unlikely(pageno < 0))
+               goto err;
+
+       /*
+        * Memory was found in the coherent area.
+        */
+       *dma_handle = mem->device_base + (pageno << PAGE_SHIFT);
+       ret = mem->virt_base + (pageno << PAGE_SHIFT);
+       spin_unlock_irqrestore(&mem->spinlock, flags);
+       memset(ret, 0, size);
+       return ret;
+err:
+       spin_unlock_irqrestore(&mem->spinlock, flags);
+       return NULL;
+}
+
+/**
+ * dma_alloc_from_dev_coherent() - allocate memory from device coherent pool
+ * @dev:       device from which we allocate memory
+ * @size:      size of requested memory area
+ * @dma_handle:        This will be filled with the correct dma handle
+ * @ret:       This pointer will be filled with the virtual address
+ *             to allocated area.
+ *
+ * This function should be only called from per-arch dma_alloc_coherent()
+ * to support allocation from per-device coherent memory pools.
+ *
+ * Returns 0 if dma_alloc_coherent should continue with allocating from
+ * generic memory areas, or !0 if dma_alloc_coherent should return @ret.
+ */
+int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
+               dma_addr_t *dma_handle, void **ret)
+{
+       struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+
+       if (!mem)
+               return 0;
+
+       *ret = __dma_alloc_from_coherent(mem, size, dma_handle);
+       if (*ret)
+               return 1;
+
+       /*
+        * In the case where the allocation can not be satisfied from the
+        * per-device area, try to fall back to generic memory if the
+        * constraints allow it.
+        */
+       return mem->flags & DMA_MEMORY_EXCLUSIVE;
+}
+EXPORT_SYMBOL(dma_alloc_from_dev_coherent);
+
+void *dma_alloc_from_global_coherent(ssize_t size, dma_addr_t *dma_handle)
+{
+       if (!dma_coherent_default_memory)
+               return NULL;
+
+       return __dma_alloc_from_coherent(dma_coherent_default_memory, size,
+                       dma_handle);
+}
+
+static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
+                                      int order, void *vaddr)
+{
+       if (mem && vaddr >= mem->virt_base && vaddr <
+                  (mem->virt_base + (mem->size << PAGE_SHIFT))) {
+               int page = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+               unsigned long flags;
+
+               spin_lock_irqsave(&mem->spinlock, flags);
+               bitmap_release_region(mem->bitmap, page, order);
+               spin_unlock_irqrestore(&mem->spinlock, flags);
+               return 1;
+       }
+       return 0;
+}
+
+/**
+ * dma_release_from_dev_coherent() - free memory to device coherent memory pool
+ * @dev:       device from which the memory was allocated
+ * @order:     the order of pages allocated
+ * @vaddr:     virtual address of allocated pages
+ *
+ * This checks whether the memory was allocated from the per-device
+ * coherent memory pool and if so, releases that memory.
+ *
+ * Returns 1 if we correctly released the memory, or 0 if the caller should
+ * proceed with releasing memory from generic pools.
+ */
+int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
+{
+       struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+
+       return __dma_release_from_coherent(mem, order, vaddr);
+}
+EXPORT_SYMBOL(dma_release_from_dev_coherent);
+
+int dma_release_from_global_coherent(int order, void *vaddr)
+{
+       if (!dma_coherent_default_memory)
+               return 0;
+
+       return __dma_release_from_coherent(dma_coherent_default_memory, order,
+                       vaddr);
+}
+
+static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
+               struct vm_area_struct *vma, void *vaddr, size_t size, int *ret)
+{
+       if (mem && vaddr >= mem->virt_base && vaddr + size <=
+                  (mem->virt_base + (mem->size << PAGE_SHIFT))) {
+               unsigned long off = vma->vm_pgoff;
+               int start = (vaddr - mem->virt_base) >> PAGE_SHIFT;
+               int user_count = vma_pages(vma);
+               int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+               *ret = -ENXIO;
+               if (off < count && user_count <= count - off) {
+                       unsigned long pfn = mem->pfn_base + start + off;
+                       *ret = remap_pfn_range(vma, vma->vm_start, pfn,
+                                              user_count << PAGE_SHIFT,
+                                              vma->vm_page_prot);
+               }
+               return 1;
+       }
+       return 0;
+}
+
+/**
+ * dma_mmap_from_dev_coherent() - mmap memory from the device coherent pool
+ * @dev:       device from which the memory was allocated
+ * @vma:       vm_area for the userspace memory
+ * @vaddr:     cpu address returned by dma_alloc_from_dev_coherent
+ * @size:      size of the memory buffer allocated
+ * @ret:       result from remap_pfn_range()
+ *
+ * This checks whether the memory was allocated from the per-device
+ * coherent memory pool and if so, maps that memory to the provided vma.
+ *
+ * Returns 1 if @vaddr belongs to the device coherent pool and the caller
+ * should return @ret, or 0 if they should proceed with mapping memory from
+ * generic areas.
+ */
+int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
+                          void *vaddr, size_t size, int *ret)
+{
+       struct dma_coherent_mem *mem = dev_get_coherent_memory(dev);
+
+       return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
+}
+EXPORT_SYMBOL(dma_mmap_from_dev_coherent);
+
+int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
+                                  size_t size, int *ret)
+{
+       if (!dma_coherent_default_memory)
+               return 0;
+
+       return __dma_mmap_from_coherent(dma_coherent_default_memory, vma,
+                                       vaddr, size, ret);
+}
+
+/*
+ * Support for reserved memory regions defined in device tree
+ */
+#ifdef CONFIG_OF_RESERVED_MEM
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+
+static struct reserved_mem *dma_reserved_default_memory __initdata;
+
+static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
+{
+       struct dma_coherent_mem *mem = rmem->priv;
+       int ret;
+
+       if (!mem) {
+               ret = dma_init_coherent_memory(rmem->base, rmem->base,
+                                              rmem->size,
+                                              DMA_MEMORY_EXCLUSIVE, &mem);
+               if (ret) {
+                       pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
+                               &rmem->base, (unsigned long)rmem->size / SZ_1M);
+                       return ret;
+               }
+       }
+       mem->use_dev_dma_pfn_offset = true;
+       rmem->priv = mem;
+       dma_assign_coherent_memory(dev, mem);
+       return 0;
+}
+
+static void rmem_dma_device_release(struct reserved_mem *rmem,
+                                   struct device *dev)
+{
+       if (dev)
+               dev->dma_mem = NULL;
+}
+
+static const struct reserved_mem_ops rmem_dma_ops = {
+       .device_init    = rmem_dma_device_init,
+       .device_release = rmem_dma_device_release,
+};
+
+static int __init rmem_dma_setup(struct reserved_mem *rmem)
+{
+       unsigned long node = rmem->fdt_node;
+
+       if (of_get_flat_dt_prop(node, "reusable", NULL))
+               return -EINVAL;
+
+#ifdef CONFIG_ARM
+       if (!of_get_flat_dt_prop(node, "no-map", NULL)) {
+               pr_err("Reserved memory: regions without no-map are not yet supported\n");
+               return -EINVAL;
+       }
+
+       if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) {
+               WARN(dma_reserved_default_memory,
+                    "Reserved memory: region for default DMA coherent area is redefined\n");
+               dma_reserved_default_memory = rmem;
+       }
+#endif
+
+       rmem->ops = &rmem_dma_ops;
+       pr_info("Reserved memory: created DMA memory pool at %pa, size %ld MiB\n",
+               &rmem->base, (unsigned long)rmem->size / SZ_1M);
+       return 0;
+}
+
+static int __init dma_init_reserved_memory(void)
+{
+       const struct reserved_mem_ops *ops;
+       int ret;
+
+       if (!dma_reserved_default_memory)
+               return -ENOMEM;
+
+       ops = dma_reserved_default_memory->ops;
+
+       /*
+        * We rely on rmem_dma_device_init() does not propagate error of
+        * dma_assign_coherent_memory() for "NULL" device.
+        */
+       ret = ops->device_init(dma_reserved_default_memory, NULL);
+
+       if (!ret) {
+               dma_coherent_default_memory = dma_reserved_default_memory->priv;
+               pr_info("DMA: default coherent area is set\n");
+       }
+
+       return ret;
+}
+
+core_initcall(dma_init_reserved_memory);
+
+RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
+#endif
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
new file mode 100644 (file)
index 0000000..d987dcd
--- /dev/null
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Contiguous Memory Allocator for DMA mapping framework
+ * Copyright (c) 2010-2011 by Samsung Electronics.
+ * Written by:
+ *     Marek Szyprowski <m.szyprowski@samsung.com>
+ *     Michal Nazarewicz <mina86@mina86.com>
+ */
+
+#define pr_fmt(fmt) "cma: " fmt
+
+#ifdef CONFIG_CMA_DEBUG
+#ifndef DEBUG
+#  define DEBUG
+#endif
+#endif
+
+#include <asm/page.h>
+#include <asm/dma-contiguous.h>
+
+#include <linux/memblock.h>
+#include <linux/err.h>
+#include <linux/sizes.h>
+#include <linux/dma-contiguous.h>
+#include <linux/cma.h>
+
+#ifdef CONFIG_CMA_SIZE_MBYTES
+#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
+#else
+#define CMA_SIZE_MBYTES 0
+#endif
+
+struct cma *dma_contiguous_default_area;
+
+/*
+ * Default global CMA area size can be defined in kernel's .config.
+ * This is useful mainly for distro maintainers to create a kernel
+ * that works correctly for most supported systems.
+ * The size can be set in bytes or as a percentage of the total memory
+ * in the system.
+ *
+ * Users, who want to set the size of global CMA area for their system
+ * should use cma= kernel parameter.
+ */
+static const phys_addr_t size_bytes = (phys_addr_t)CMA_SIZE_MBYTES * SZ_1M;
+static phys_addr_t size_cmdline = -1;
+static phys_addr_t base_cmdline;
+static phys_addr_t limit_cmdline;
+
+static int __init early_cma(char *p)
+{
+       pr_debug("%s(%s)\n", __func__, p);
+       size_cmdline = memparse(p, &p);
+       if (*p != '@')
+               return 0;
+       base_cmdline = memparse(p + 1, &p);
+       if (*p != '-') {
+               limit_cmdline = base_cmdline + size_cmdline;
+               return 0;
+       }
+       limit_cmdline = memparse(p + 1, &p);
+
+       return 0;
+}
+early_param("cma", early_cma);
+
+#ifdef CONFIG_CMA_SIZE_PERCENTAGE
+
+static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
+{
+       struct memblock_region *reg;
+       unsigned long total_pages = 0;
+
+       /*
+        * We cannot use memblock_phys_mem_size() here, because
+        * memblock_analyze() has not been called yet.
+        */
+       for_each_memblock(memory, reg)
+               total_pages += memblock_region_memory_end_pfn(reg) -
+                              memblock_region_memory_base_pfn(reg);
+
+       return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
+}
+
+#else
+
+static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
+{
+       return 0;
+}
+
+#endif
+
+/**
+ * dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory.
+ */
+void __init dma_contiguous_reserve(phys_addr_t limit)
+{
+       phys_addr_t selected_size = 0;
+       phys_addr_t selected_base = 0;
+       phys_addr_t selected_limit = limit;
+       bool fixed = false;
+
+       pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
+
+       if (size_cmdline != -1) {
+               selected_size = size_cmdline;
+               selected_base = base_cmdline;
+               selected_limit = min_not_zero(limit_cmdline, limit);
+               if (base_cmdline + size_cmdline == limit_cmdline)
+                       fixed = true;
+       } else {
+#ifdef CONFIG_CMA_SIZE_SEL_MBYTES
+               selected_size = size_bytes;
+#elif defined(CONFIG_CMA_SIZE_SEL_PERCENTAGE)
+               selected_size = cma_early_percent_memory();
+#elif defined(CONFIG_CMA_SIZE_SEL_MIN)
+               selected_size = min(size_bytes, cma_early_percent_memory());
+#elif defined(CONFIG_CMA_SIZE_SEL_MAX)
+               selected_size = max(size_bytes, cma_early_percent_memory());
+#endif
+       }
+
+       if (selected_size && !dma_contiguous_default_area) {
+               pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+                        (unsigned long)selected_size / SZ_1M);
+
+               dma_contiguous_reserve_area(selected_size, selected_base,
+                                           selected_limit,
+                                           &dma_contiguous_default_area,
+                                           fixed);
+       }
+}
+
+/**
+ * dma_contiguous_reserve_area() - reserve custom contiguous area
+ * @size: Size of the reserved area (in bytes),
+ * @base: Base address of the reserved area optional, use 0 for any
+ * @limit: End address of the reserved memory (optional, 0 for any).
+ * @res_cma: Pointer to store the created cma region.
+ * @fixed: hint about where to place the reserved area
+ *
+ * This function reserves memory from early allocator. It should be
+ * called by arch specific code once the early allocator (memblock or bootmem)
+ * has been activated and all other subsystems have already allocated/reserved
+ * memory. This function allows to create custom reserved areas for specific
+ * devices.
+ *
+ * If @fixed is true, reserve contiguous area at exactly @base.  If false,
+ * reserve in range from @base to @limit.
+ */
+int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base,
+                                      phys_addr_t limit, struct cma **res_cma,
+                                      bool fixed)
+{
+       int ret;
+
+       ret = cma_declare_contiguous(base, size, limit, 0, 0, fixed,
+                                       "reserved", res_cma);
+       if (ret)
+               return ret;
+
+       /* Architecture specific contiguous memory fixup. */
+       dma_contiguous_early_fixup(cma_get_base(*res_cma),
+                               cma_get_size(*res_cma));
+
+       return 0;
+}
+
+/**
+ * dma_alloc_from_contiguous() - allocate pages from contiguous area
+ * @dev:   Pointer to device for which the allocation is performed.
+ * @count: Requested number of pages.
+ * @align: Requested alignment of pages (in PAGE_SIZE order).
+ * @gfp_mask: GFP flags to use for this allocation.
+ *
+ * This function allocates memory buffer for specified device. It uses
+ * device specific contiguous memory area if available or the default
+ * global one. Requires architecture specific dev_get_cma_area() helper
+ * function.
+ */
+struct page *dma_alloc_from_contiguous(struct device *dev, size_t count,
+                                      unsigned int align, gfp_t gfp_mask)
+{
+       if (align > CONFIG_CMA_ALIGNMENT)
+               align = CONFIG_CMA_ALIGNMENT;
+
+       return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask);
+}
+
+/**
+ * dma_release_from_contiguous() - release allocated pages
+ * @dev:   Pointer to device for which the pages were allocated.
+ * @pages: Allocated pages.
+ * @count: Number of allocated pages.
+ *
+ * This function releases memory allocated by dma_alloc_from_contiguous().
+ * It returns false when provided pages do not belong to contiguous area and
+ * true otherwise.
+ */
+bool dma_release_from_contiguous(struct device *dev, struct page *pages,
+                                int count)
+{
+       return cma_release(dev_get_cma_area(dev), pages, count);
+}
+
+/*
+ * Support for reserved memory regions defined in device tree
+ */
+#ifdef CONFIG_OF_RESERVED_MEM
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) fmt
+
+static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
+{
+       dev_set_cma_area(dev, rmem->priv);
+       return 0;
+}
+
+static void rmem_cma_device_release(struct reserved_mem *rmem,
+                                   struct device *dev)
+{
+       dev_set_cma_area(dev, NULL);
+}
+
+static const struct reserved_mem_ops rmem_cma_ops = {
+       .device_init    = rmem_cma_device_init,
+       .device_release = rmem_cma_device_release,
+};
+
+static int __init rmem_cma_setup(struct reserved_mem *rmem)
+{
+       phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+       phys_addr_t mask = align - 1;
+       unsigned long node = rmem->fdt_node;
+       struct cma *cma;
+       int err;
+
+       if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
+           of_get_flat_dt_prop(node, "no-map", NULL))
+               return -EINVAL;
+
+       if ((rmem->base & mask) || (rmem->size & mask)) {
+               pr_err("Reserved memory: incorrect alignment of CMA region\n");
+               return -EINVAL;
+       }
+
+       err = cma_init_reserved_mem(rmem->base, rmem->size, 0, rmem->name, &cma);
+       if (err) {
+               pr_err("Reserved memory: unable to setup CMA region\n");
+               return err;
+       }
+       /* Architecture specific contiguous memory fixup. */
+       dma_contiguous_early_fixup(rmem->base, rmem->size);
+
+       if (of_get_flat_dt_prop(node, "linux,cma-default", NULL))
+               dma_contiguous_set_default(cma);
+
+       rmem->ops = &rmem_cma_ops;
+       rmem->priv = cma;
+
+       pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
+               &rmem->base, (unsigned long)rmem->size / SZ_1M);
+
+       return 0;
+}
+RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
+#endif
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
new file mode 100644 (file)
index 0000000..c007d25
--- /dev/null
@@ -0,0 +1,1773 @@
+/*
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
+ *
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ */
+
+#include <linux/sched/task_stack.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-mapping.h>
+#include <linux/sched/task.h>
+#include <linux/stacktrace.h>
+#include <linux/dma-debug.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include <asm/sections.h>
+
+#define HASH_SIZE       1024ULL
+#define HASH_FN_SHIFT   13
+#define HASH_FN_MASK    (HASH_SIZE - 1)
+
+/* allow architectures to override this if absolutely required */
+#ifndef PREALLOC_DMA_DEBUG_ENTRIES
+#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
+#endif
+
+enum {
+       dma_debug_single,
+       dma_debug_page,
+       dma_debug_sg,
+       dma_debug_coherent,
+       dma_debug_resource,
+};
+
+enum map_err_types {
+       MAP_ERR_CHECK_NOT_APPLICABLE,
+       MAP_ERR_NOT_CHECKED,
+       MAP_ERR_CHECKED,
+};
+
+#define DMA_DEBUG_STACKTRACE_ENTRIES 5
+
+/**
+ * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
+ * @list: node on pre-allocated free_entries list
+ * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
+ * @type: single, page, sg, coherent
+ * @pfn: page frame of the start address
+ * @offset: offset of mapping relative to pfn
+ * @size: length of the mapping
+ * @direction: enum dma_data_direction
+ * @sg_call_ents: 'nents' from dma_map_sg
+ * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
+ * @map_err_type: track whether dma_mapping_error() was checked
+ * @stacktrace: support backtraces when a violation is detected
+ */
+struct dma_debug_entry {
+       struct list_head list;
+       struct device    *dev;
+       int              type;
+       unsigned long    pfn;
+       size_t           offset;
+       u64              dev_addr;
+       u64              size;
+       int              direction;
+       int              sg_call_ents;
+       int              sg_mapped_ents;
+       enum map_err_types  map_err_type;
+#ifdef CONFIG_STACKTRACE
+       struct           stack_trace stacktrace;
+       unsigned long    st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
+#endif
+};
+
+typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);
+
+struct hash_bucket {
+       struct list_head list;
+       spinlock_t lock;
+} ____cacheline_aligned_in_smp;
+
+/* Hash list to save the allocated dma addresses */
+static struct hash_bucket dma_entry_hash[HASH_SIZE];
+/* List of pre-allocated dma_debug_entry's */
+static LIST_HEAD(free_entries);
+/* Lock for the list above */
+static DEFINE_SPINLOCK(free_entries_lock);
+
+/* Global disable flag - will be set in case of an error */
+static bool global_disable __read_mostly;
+
+/* Early initialization disable flag, set at the end of dma_debug_init */
+static bool dma_debug_initialized __read_mostly;
+
+static inline bool dma_debug_disabled(void)
+{
+       return global_disable || !dma_debug_initialized;
+}
+
+/* Global error count */
+static u32 error_count;
+
+/* Global error show enable*/
+static u32 show_all_errors __read_mostly;
+/* Number of errors to show */
+static u32 show_num_errors = 1;
+
+static u32 num_free_entries;
+static u32 min_free_entries;
+static u32 nr_total_entries;
+
+/* number of preallocated entries requested by kernel cmdline */
+static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
+
+/* debugfs dentry's for the stuff above */
+static struct dentry *dma_debug_dent        __read_mostly;
+static struct dentry *global_disable_dent   __read_mostly;
+static struct dentry *error_count_dent      __read_mostly;
+static struct dentry *show_all_errors_dent  __read_mostly;
+static struct dentry *show_num_errors_dent  __read_mostly;
+static struct dentry *num_free_entries_dent __read_mostly;
+static struct dentry *min_free_entries_dent __read_mostly;
+static struct dentry *filter_dent           __read_mostly;
+
+/* per-driver filter related state */
+
+#define NAME_MAX_LEN   64
+
+static char                  current_driver_name[NAME_MAX_LEN] __read_mostly;
+static struct device_driver *current_driver                    __read_mostly;
+
+static DEFINE_RWLOCK(driver_name_lock);
+
+static const char *const maperr2str[] = {
+       [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
+       [MAP_ERR_NOT_CHECKED] = "dma map error not checked",
+       [MAP_ERR_CHECKED] = "dma map error checked",
+};
+
+static const char *type2name[5] = { "single", "page",
+                                   "scather-gather", "coherent",
+                                   "resource" };
+
+static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
+                                  "DMA_FROM_DEVICE", "DMA_NONE" };
+
+/*
+ * The access to some variables in this macro is racy. We can't use atomic_t
+ * here because all these variables are exported to debugfs. Some of them even
+ * writeable. This is also the reason why a lock won't help much. But anyway,
+ * the races are no big deal. Here is why:
+ *
+ *   error_count: the addition is racy, but the worst thing that can happen is
+ *                that we don't count some errors
+ *   show_num_errors: the subtraction is racy. Also no big deal because in
+ *                    worst case this will result in one warning more in the
+ *                    system log than the user configured. This variable is
+ *                    writeable via debugfs.
+ */
+static inline void dump_entry_trace(struct dma_debug_entry *entry)
+{
+#ifdef CONFIG_STACKTRACE
+       if (entry) {
+               pr_warning("Mapped at:\n");
+               print_stack_trace(&entry->stacktrace, 0);
+       }
+#endif
+}
+
+static bool driver_filter(struct device *dev)
+{
+       struct device_driver *drv;
+       unsigned long flags;
+       bool ret;
+
+       /* driver filter off */
+       if (likely(!current_driver_name[0]))
+               return true;
+
+       /* driver filter on and initialized */
+       if (current_driver && dev && dev->driver == current_driver)
+               return true;
+
+       /* driver filter on, but we can't filter on a NULL device... */
+       if (!dev)
+               return false;
+
+       if (current_driver || !current_driver_name[0])
+               return false;
+
+       /* driver filter on but not yet initialized */
+       drv = dev->driver;
+       if (!drv)
+               return false;
+
+       /* lock to protect against change of current_driver_name */
+       read_lock_irqsave(&driver_name_lock, flags);
+
+       ret = false;
+       if (drv->name &&
+           strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
+               current_driver = drv;
+               ret = true;
+       }
+
+       read_unlock_irqrestore(&driver_name_lock, flags);
+
+       return ret;
+}
+
+#define err_printk(dev, entry, format, arg...) do {                    \
+               error_count += 1;                                       \
+               if (driver_filter(dev) &&                               \
+                   (show_all_errors || show_num_errors > 0)) {         \
+                       WARN(1, "%s %s: " format,                       \
+                            dev ? dev_driver_string(dev) : "NULL",     \
+                            dev ? dev_name(dev) : "NULL", ## arg);     \
+                       dump_entry_trace(entry);                        \
+               }                                                       \
+               if (!show_all_errors && show_num_errors > 0)            \
+                       show_num_errors -= 1;                           \
+       } while (0);
+
+/*
+ * Hash related functions
+ *
+ * Every DMA-API request is saved into a struct dma_debug_entry. To
+ * have quick access to these structs they are stored into a hash.
+ */
+static int hash_fn(struct dma_debug_entry *entry)
+{
+       /*
+        * Hash function is based on the dma address.
+        * We use bits 20-27 here as the index into the hash
+        */
+       return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
+}
+
+/*
+ * Request exclusive access to a hash bucket for a given dma_debug_entry.
+ */
+static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
+                                          unsigned long *flags)
+       __acquires(&dma_entry_hash[idx].lock)
+{
+       int idx = hash_fn(entry);
+       unsigned long __flags;
+
+       spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
+       *flags = __flags;
+       return &dma_entry_hash[idx];
+}
+
+/*
+ * Give up exclusive access to the hash bucket
+ */
+static void put_hash_bucket(struct hash_bucket *bucket,
+                           unsigned long *flags)
+       __releases(&bucket->lock)
+{
+       unsigned long __flags = *flags;
+
+       spin_unlock_irqrestore(&bucket->lock, __flags);
+}
+
+static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b)
+{
+       return ((a->dev_addr == b->dev_addr) &&
+               (a->dev == b->dev)) ? true : false;
+}
+
+static bool containing_match(struct dma_debug_entry *a,
+                            struct dma_debug_entry *b)
+{
+       if (a->dev != b->dev)
+               return false;
+
+       if ((b->dev_addr <= a->dev_addr) &&
+           ((b->dev_addr + b->size) >= (a->dev_addr + a->size)))
+               return true;
+
+       return false;
+}
+
+/*
+ * Search a given entry in the hash bucket list
+ */
+static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket,
+                                                 struct dma_debug_entry *ref,
+                                                 match_fn match)
+{
+       struct dma_debug_entry *entry, *ret = NULL;
+       int matches = 0, match_lvl, last_lvl = -1;
+
+       list_for_each_entry(entry, &bucket->list, list) {
+               if (!match(ref, entry))
+                       continue;
+
+               /*
+                * Some drivers map the same physical address multiple
+                * times. Without a hardware IOMMU this results in the
+                * same device addresses being put into the dma-debug
+                * hash multiple times too. This can result in false
+                * positives being reported. Therefore we implement a
+                * best-fit algorithm here which returns the entry from
+                * the hash which fits best to the reference value
+                * instead of the first-fit.
+                */
+               matches += 1;
+               match_lvl = 0;
+               entry->size         == ref->size         ? ++match_lvl : 0;
+               entry->type         == ref->type         ? ++match_lvl : 0;
+               entry->direction    == ref->direction    ? ++match_lvl : 0;
+               entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0;
+
+               if (match_lvl == 4) {
+                       /* perfect-fit - return the result */
+                       return entry;
+               } else if (match_lvl > last_lvl) {
+                       /*
+                        * We found an entry that fits better then the
+                        * previous one or it is the 1st match.
+                        */
+                       last_lvl = match_lvl;
+                       ret      = entry;
+               }
+       }
+
+       /*
+        * If we have multiple matches but no perfect-fit, just return
+        * NULL.
+        */
+       ret = (matches == 1) ? ret : NULL;
+
+       return ret;
+}
+
+static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket,
+                                                struct dma_debug_entry *ref)
+{
+       return __hash_bucket_find(bucket, ref, exact_match);
+}
+
+static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
+                                                  struct dma_debug_entry *ref,
+                                                  unsigned long *flags)
+{
+
+       unsigned int max_range = dma_get_max_seg_size(ref->dev);
+       struct dma_debug_entry *entry, index = *ref;
+       unsigned int range = 0;
+
+       while (range <= max_range) {
+               entry = __hash_bucket_find(*bucket, ref, containing_match);
+
+               if (entry)
+                       return entry;
+
+               /*
+                * Nothing found, go back a hash bucket
+                */
+               put_hash_bucket(*bucket, flags);
+               range          += (1 << HASH_FN_SHIFT);
+               index.dev_addr -= (1 << HASH_FN_SHIFT);
+               *bucket = get_hash_bucket(&index, flags);
+       }
+
+       return NULL;
+}
+
+/*
+ * Add an entry to a hash bucket
+ */
+static void hash_bucket_add(struct hash_bucket *bucket,
+                           struct dma_debug_entry *entry)
+{
+       list_add_tail(&entry->list, &bucket->list);
+}
+
+/*
+ * Remove entry from a hash bucket list
+ */
+static void hash_bucket_del(struct dma_debug_entry *entry)
+{
+       list_del(&entry->list);
+}
+
+static unsigned long long phys_addr(struct dma_debug_entry *entry)
+{
+       if (entry->type == dma_debug_resource)
+               return __pfn_to_phys(entry->pfn) + entry->offset;
+
+       return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
+}
+
+/*
+ * Dump mapping entries for debugging purposes
+ */
+void debug_dma_dump_mappings(struct device *dev)
+{
+       int idx;
+
+       for (idx = 0; idx < HASH_SIZE; idx++) {
+               struct hash_bucket *bucket = &dma_entry_hash[idx];
+               struct dma_debug_entry *entry;
+               unsigned long flags;
+
+               spin_lock_irqsave(&bucket->lock, flags);
+
+               list_for_each_entry(entry, &bucket->list, list) {
+                       if (!dev || dev == entry->dev) {
+                               dev_info(entry->dev,
+                                        "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
+                                        type2name[entry->type], idx,
+                                        phys_addr(entry), entry->pfn,
+                                        entry->dev_addr, entry->size,
+                                        dir2name[entry->direction],
+                                        maperr2str[entry->map_err_type]);
+                       }
+               }
+
+               spin_unlock_irqrestore(&bucket->lock, flags);
+       }
+}
+
+/*
+ * For each mapping (initial cacheline in the case of
+ * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
+ * scatterlist, or the cacheline specified in dma_map_single) insert
+ * into this tree using the cacheline as the key. At
+ * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry.  If
+ * the entry already exists at insertion time add a tag as a reference
+ * count for the overlapping mappings.  For now, the overlap tracking
+ * just ensures that 'unmaps' balance 'maps' before marking the
+ * cacheline idle, but we should also be flagging overlaps as an API
+ * violation.
+ *
+ * Memory usage is mostly constrained by the maximum number of available
+ * dma-debug entries in that we need a free dma_debug_entry before
+ * inserting into the tree.  In the case of dma_map_page and
+ * dma_alloc_coherent there is only one dma_debug_entry and one
+ * dma_active_cacheline entry to track per event.  dma_map_sg(), on the
+ * other hand, consumes a single dma_debug_entry, but inserts 'nents'
+ * entries into the tree.
+ *
+ * At any time debug_dma_assert_idle() can be called to trigger a
+ * warning if any cachelines in the given page are in the active set.
+ */
+static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
+static DEFINE_SPINLOCK(radix_lock);
+#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
+#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
+#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
+
+static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
+{
+       return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
+               (entry->offset >> L1_CACHE_SHIFT);
+}
+
+static int active_cacheline_read_overlap(phys_addr_t cln)
+{
+       int overlap = 0, i;
+
+       for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
+               if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
+                       overlap |= 1 << i;
+       return overlap;
+}
+
+static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
+{
+       int i;
+
+       if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
+               return overlap;
+
+       for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
+               if (overlap & 1 << i)
+                       radix_tree_tag_set(&dma_active_cacheline, cln, i);
+               else
+                       radix_tree_tag_clear(&dma_active_cacheline, cln, i);
+
+       return overlap;
+}
+
+static void active_cacheline_inc_overlap(phys_addr_t cln)
+{
+       int overlap = active_cacheline_read_overlap(cln);
+
+       overlap = active_cacheline_set_overlap(cln, ++overlap);
+
+       /* If we overflowed the overlap counter then we're potentially
+        * leaking dma-mappings.  Otherwise, if maps and unmaps are
+        * balanced then this overflow may cause false negatives in
+        * debug_dma_assert_idle() as the cacheline may be marked idle
+        * prematurely.
+        */
+       WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
+                 "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
+                 ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
+}
+
+static int active_cacheline_dec_overlap(phys_addr_t cln)
+{
+       int overlap = active_cacheline_read_overlap(cln);
+
+       return active_cacheline_set_overlap(cln, --overlap);
+}
+
+static int active_cacheline_insert(struct dma_debug_entry *entry)
+{
+       phys_addr_t cln = to_cacheline_number(entry);
+       unsigned long flags;
+       int rc;
+
+       /* If the device is not writing memory then we don't have any
+        * concerns about the cpu consuming stale data.  This mitigates
+        * legitimate usages of overlapping mappings.
+        */
+       if (entry->direction == DMA_TO_DEVICE)
+               return 0;
+
+       spin_lock_irqsave(&radix_lock, flags);
+       rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
+       if (rc == -EEXIST)
+               active_cacheline_inc_overlap(cln);
+       spin_unlock_irqrestore(&radix_lock, flags);
+
+       return rc;
+}
+
+static void active_cacheline_remove(struct dma_debug_entry *entry)
+{
+       phys_addr_t cln = to_cacheline_number(entry);
+       unsigned long flags;
+
+       /* ...mirror the insert case */
+       if (entry->direction == DMA_TO_DEVICE)
+               return;
+
+       spin_lock_irqsave(&radix_lock, flags);
+       /* since we are counting overlaps the final put of the
+        * cacheline will occur when the overlap count is 0.
+        * active_cacheline_dec_overlap() returns -1 in that case
+        */
+       if (active_cacheline_dec_overlap(cln) < 0)
+               radix_tree_delete(&dma_active_cacheline, cln);
+       spin_unlock_irqrestore(&radix_lock, flags);
+}
+
+/**
+ * debug_dma_assert_idle() - assert that a page is not undergoing dma
+ * @page: page to lookup in the dma_active_cacheline tree
+ *
+ * Place a call to this routine in cases where the cpu touching the page
+ * before the dma completes (page is dma_unmapped) will lead to data
+ * corruption.
+ */
+void debug_dma_assert_idle(struct page *page)
+{
+       static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
+       struct dma_debug_entry *entry = NULL;
+       void **results = (void **) &ents;
+       unsigned int nents, i;
+       unsigned long flags;
+       phys_addr_t cln;
+
+       if (dma_debug_disabled())
+               return;
+
+       if (!page)
+               return;
+
+       cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
+       spin_lock_irqsave(&radix_lock, flags);
+       nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
+                                      CACHELINES_PER_PAGE);
+       for (i = 0; i < nents; i++) {
+               phys_addr_t ent_cln = to_cacheline_number(ents[i]);
+
+               if (ent_cln == cln) {
+                       entry = ents[i];
+                       break;
+               } else if (ent_cln >= cln + CACHELINES_PER_PAGE)
+                       break;
+       }
+       spin_unlock_irqrestore(&radix_lock, flags);
+
+       if (!entry)
+               return;
+
+       cln = to_cacheline_number(entry);
+       err_printk(entry->dev, entry,
+                  "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
+                  &cln);
+}
+
+/*
+ * Wrapper function for adding an entry to the hash.
+ * This function takes care of locking itself.
+ */
+static void add_dma_entry(struct dma_debug_entry *entry)
+{
+       struct hash_bucket *bucket;
+       unsigned long flags;
+       int rc;
+
+       bucket = get_hash_bucket(entry, &flags);
+       hash_bucket_add(bucket, entry);
+       put_hash_bucket(bucket, &flags);
+
+       rc = active_cacheline_insert(entry);
+       if (rc == -ENOMEM) {
+               pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
+               global_disable = true;
+       }
+
+       /* TODO: report -EEXIST errors here as overlapping mappings are
+        * not supported by the DMA API
+        */
+}
+
+static struct dma_debug_entry *__dma_entry_alloc(void)
+{
+       struct dma_debug_entry *entry;
+
+       entry = list_entry(free_entries.next, struct dma_debug_entry, list);
+       list_del(&entry->list);
+       memset(entry, 0, sizeof(*entry));
+
+       num_free_entries -= 1;
+       if (num_free_entries < min_free_entries)
+               min_free_entries = num_free_entries;
+
+       return entry;
+}
+
+/* struct dma_entry allocator
+ *
+ * The next two functions implement the allocator for
+ * struct dma_debug_entries.
+ */
+static struct dma_debug_entry *dma_entry_alloc(void)
+{
+       struct dma_debug_entry *entry;
+       unsigned long flags;
+
+       spin_lock_irqsave(&free_entries_lock, flags);
+
+       if (list_empty(&free_entries)) {
+               global_disable = true;
+               spin_unlock_irqrestore(&free_entries_lock, flags);
+               pr_err("DMA-API: debugging out of memory - disabling\n");
+               return NULL;
+       }
+
+       entry = __dma_entry_alloc();
+
+       spin_unlock_irqrestore(&free_entries_lock, flags);
+
+#ifdef CONFIG_STACKTRACE
+       entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
+       entry->stacktrace.entries = entry->st_entries;
+       entry->stacktrace.skip = 2;
+       save_stack_trace(&entry->stacktrace);
+#endif
+
+       return entry;
+}
+
+static void dma_entry_free(struct dma_debug_entry *entry)
+{
+       unsigned long flags;
+
+       active_cacheline_remove(entry);
+
+       /*
+        * add to beginning of the list - this way the entries are
+        * more likely cache hot when they are reallocated.
+        */
+       spin_lock_irqsave(&free_entries_lock, flags);
+       list_add(&entry->list, &free_entries);
+       num_free_entries += 1;
+       spin_unlock_irqrestore(&free_entries_lock, flags);
+}
+
+int dma_debug_resize_entries(u32 num_entries)
+{
+       int i, delta, ret = 0;
+       unsigned long flags;
+       struct dma_debug_entry *entry;
+       LIST_HEAD(tmp);
+
+       spin_lock_irqsave(&free_entries_lock, flags);
+
+       if (nr_total_entries < num_entries) {
+               delta = num_entries - nr_total_entries;
+
+               spin_unlock_irqrestore(&free_entries_lock, flags);
+
+               for (i = 0; i < delta; i++) {
+                       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+                       if (!entry)
+                               break;
+
+                       list_add_tail(&entry->list, &tmp);
+               }
+
+               spin_lock_irqsave(&free_entries_lock, flags);
+
+               list_splice(&tmp, &free_entries);
+               nr_total_entries += i;
+               num_free_entries += i;
+       } else {
+               delta = nr_total_entries - num_entries;
+
+               for (i = 0; i < delta && !list_empty(&free_entries); i++) {
+                       entry = __dma_entry_alloc();
+                       kfree(entry);
+               }
+
+               nr_total_entries -= i;
+       }
+
+       if (nr_total_entries != num_entries)
+               ret = 1;
+
+       spin_unlock_irqrestore(&free_entries_lock, flags);
+
+       return ret;
+}
+
+/*
+ * DMA-API debugging init code
+ *
+ * The init code does two things:
+ *   1. Initialize core data structures
+ *   2. Preallocate a given number of dma_debug_entry structs
+ */
+
+static int prealloc_memory(u32 num_entries)
+{
+       struct dma_debug_entry *entry, *next_entry;
+       int i;
+
+       for (i = 0; i < num_entries; ++i) {
+               entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+               if (!entry)
+                       goto out_err;
+
+               list_add_tail(&entry->list, &free_entries);
+       }
+
+       num_free_entries = num_entries;
+       min_free_entries = num_entries;
+
+       pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
+
+       return 0;
+
+out_err:
+
+       list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
+               list_del(&entry->list);
+               kfree(entry);
+       }
+
+       return -ENOMEM;
+}
+
+static ssize_t filter_read(struct file *file, char __user *user_buf,
+                          size_t count, loff_t *ppos)
+{
+       char buf[NAME_MAX_LEN + 1];
+       unsigned long flags;
+       int len;
+
+       if (!current_driver_name[0])
+               return 0;
+
+       /*
+        * We can't copy to userspace directly because current_driver_name can
+        * only be read under the driver_name_lock with irqs disabled. So
+        * create a temporary copy first.
+        */
+       read_lock_irqsave(&driver_name_lock, flags);
+       len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
+       read_unlock_irqrestore(&driver_name_lock, flags);
+
+       return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static ssize_t filter_write(struct file *file, const char __user *userbuf,
+                           size_t count, loff_t *ppos)
+{
+       char buf[NAME_MAX_LEN];
+       unsigned long flags;
+       size_t len;
+       int i;
+
+       /*
+        * We can't copy from userspace directly. Access to
+        * current_driver_name is protected with a write_lock with irqs
+        * disabled. Since copy_from_user can fault and may sleep we
+        * need to copy to temporary buffer first
+        */
+       len = min(count, (size_t)(NAME_MAX_LEN - 1));
+       if (copy_from_user(buf, userbuf, len))
+               return -EFAULT;
+
+       buf[len] = 0;
+
+       write_lock_irqsave(&driver_name_lock, flags);
+
+       /*
+        * Now handle the string we got from userspace very carefully.
+        * The rules are:
+        *         - only use the first token we got
+        *         - token delimiter is everything looking like a space
+        *           character (' ', '\n', '\t' ...)
+        *
+        */
+       if (!isalnum(buf[0])) {
+               /*
+                * If the first character userspace gave us is not
+                * alphanumerical then assume the filter should be
+                * switched off.
+                */
+               if (current_driver_name[0])
+                       pr_info("DMA-API: switching off dma-debug driver filter\n");
+               current_driver_name[0] = 0;
+               current_driver = NULL;
+               goto out_unlock;
+       }
+
+       /*
+        * Now parse out the first token and use it as the name for the
+        * driver to filter for.
+        */
+       for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
+               current_driver_name[i] = buf[i];
+               if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
+                       break;
+       }
+       current_driver_name[i] = 0;
+       current_driver = NULL;
+
+       pr_info("DMA-API: enable driver filter for driver [%s]\n",
+               current_driver_name);
+
+out_unlock:
+       write_unlock_irqrestore(&driver_name_lock, flags);
+
+       return count;
+}
+
+static const struct file_operations filter_fops = {
+       .read  = filter_read,
+       .write = filter_write,
+       .llseek = default_llseek,
+};
+
+static int dma_debug_fs_init(void)
+{
+       dma_debug_dent = debugfs_create_dir("dma-api", NULL);
+       if (!dma_debug_dent) {
+               pr_err("DMA-API: can not create debugfs directory\n");
+               return -ENOMEM;
+       }
+
+       global_disable_dent = debugfs_create_bool("disabled", 0444,
+                       dma_debug_dent,
+                       &global_disable);
+       if (!global_disable_dent)
+               goto out_err;
+
+       error_count_dent = debugfs_create_u32("error_count", 0444,
+                       dma_debug_dent, &error_count);
+       if (!error_count_dent)
+               goto out_err;
+
+       show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
+                       dma_debug_dent,
+                       &show_all_errors);
+       if (!show_all_errors_dent)
+               goto out_err;
+
+       show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
+                       dma_debug_dent,
+                       &show_num_errors);
+       if (!show_num_errors_dent)
+               goto out_err;
+
+       num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
+                       dma_debug_dent,
+                       &num_free_entries);
+       if (!num_free_entries_dent)
+               goto out_err;
+
+       min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
+                       dma_debug_dent,
+                       &min_free_entries);
+       if (!min_free_entries_dent)
+               goto out_err;
+
+       filter_dent = debugfs_create_file("driver_filter", 0644,
+                                         dma_debug_dent, NULL, &filter_fops);
+       if (!filter_dent)
+               goto out_err;
+
+       return 0;
+
+out_err:
+       debugfs_remove_recursive(dma_debug_dent);
+
+       return -ENOMEM;
+}
+
+static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
+{
+       struct dma_debug_entry *entry;
+       unsigned long flags;
+       int count = 0, i;
+
+       for (i = 0; i < HASH_SIZE; ++i) {
+               spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
+               list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
+                       if (entry->dev == dev) {
+                               count += 1;
+                               *out_entry = entry;
+                       }
+               }
+               spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
+       }
+
+       return count;
+}
+
+static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
+{
+       struct device *dev = data;
+       struct dma_debug_entry *uninitialized_var(entry);
+       int count;
+
+       if (dma_debug_disabled())
+               return 0;
+
+       switch (action) {
+       case BUS_NOTIFY_UNBOUND_DRIVER:
+               count = device_dma_allocations(dev, &entry);
+               if (count == 0)
+                       break;
+               err_printk(dev, entry, "DMA-API: device driver has pending "
+                               "DMA allocations while released from device "
+                               "[count=%d]\n"
+                               "One of leaked entries details: "
+                               "[device address=0x%016llx] [size=%llu bytes] "
+                               "[mapped with %s] [mapped as %s]\n",
+                       count, entry->dev_addr, entry->size,
+                       dir2name[entry->direction], type2name[entry->type]);
+               break;
+       default:
+               break;
+       }
+
+       return 0;
+}
+
+void dma_debug_add_bus(struct bus_type *bus)
+{
+       struct notifier_block *nb;
+
+       if (dma_debug_disabled())
+               return;
+
+       nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
+       if (nb == NULL) {
+               pr_err("dma_debug_add_bus: out of memory\n");
+               return;
+       }
+
+       nb->notifier_call = dma_debug_device_change;
+
+       bus_register_notifier(bus, nb);
+}
+
+static int dma_debug_init(void)
+{
+       int i;
+
+       /* Do not use dma_debug_initialized here, since we really want to be
+        * called to set dma_debug_initialized
+        */
+       if (global_disable)
+               return 0;
+
+       for (i = 0; i < HASH_SIZE; ++i) {
+               INIT_LIST_HEAD(&dma_entry_hash[i].list);
+               spin_lock_init(&dma_entry_hash[i].lock);
+       }
+
+       if (dma_debug_fs_init() != 0) {
+               pr_err("DMA-API: error creating debugfs entries - disabling\n");
+               global_disable = true;
+
+               return 0;
+       }
+
+       if (prealloc_memory(nr_prealloc_entries) != 0) {
+               pr_err("DMA-API: debugging out of memory error - disabled\n");
+               global_disable = true;
+
+               return 0;
+       }
+
+       nr_total_entries = num_free_entries;
+
+       dma_debug_initialized = true;
+
+       pr_info("DMA-API: debugging enabled by kernel config\n");
+       return 0;
+}
+core_initcall(dma_debug_init);
+
+static __init int dma_debug_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+
+       if (strncmp(str, "off", 3) == 0) {
+               pr_info("DMA-API: debugging disabled on kernel command line\n");
+               global_disable = true;
+       }
+
+       return 0;
+}
+
+static __init int dma_debug_entries_cmdline(char *str)
+{
+       if (!str)
+               return -EINVAL;
+       if (!get_option(&str, &nr_prealloc_entries))
+               nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
+       return 0;
+}
+
+__setup("dma_debug=", dma_debug_cmdline);
+__setup("dma_debug_entries=", dma_debug_entries_cmdline);
+
+static void check_unmap(struct dma_debug_entry *ref)
+{
+       struct dma_debug_entry *entry;
+       struct hash_bucket *bucket;
+       unsigned long flags;
+
+       bucket = get_hash_bucket(ref, &flags);
+       entry = bucket_find_exact(bucket, ref);
+
+       if (!entry) {
+               /* must drop lock before calling dma_mapping_error */
+               put_hash_bucket(bucket, &flags);
+
+               if (dma_mapping_error(ref->dev, ref->dev_addr)) {
+                       err_printk(ref->dev, NULL,
+                                  "DMA-API: device driver tries to free an "
+                                  "invalid DMA memory address\n");
+               } else {
+                       err_printk(ref->dev, NULL,
+                                  "DMA-API: device driver tries to free DMA "
+                                  "memory it has not allocated [device "
+                                  "address=0x%016llx] [size=%llu bytes]\n",
+                                  ref->dev_addr, ref->size);
+               }
+               return;
+       }
+
+       if (ref->size != entry->size) {
+               err_printk(ref->dev, entry, "DMA-API: device driver frees "
+                          "DMA memory with different size "
+                          "[device address=0x%016llx] [map size=%llu bytes] "
+                          "[unmap size=%llu bytes]\n",
+                          ref->dev_addr, entry->size, ref->size);
+       }
+
+       if (ref->type != entry->type) {
+               err_printk(ref->dev, entry, "DMA-API: device driver frees "
+                          "DMA memory with wrong function "
+                          "[device address=0x%016llx] [size=%llu bytes] "
+                          "[mapped as %s] [unmapped as %s]\n",
+                          ref->dev_addr, ref->size,
+                          type2name[entry->type], type2name[ref->type]);
+       } else if ((entry->type == dma_debug_coherent) &&
+                  (phys_addr(ref) != phys_addr(entry))) {
+               err_printk(ref->dev, entry, "DMA-API: device driver frees "
+                          "DMA memory with different CPU address "
+                          "[device address=0x%016llx] [size=%llu bytes] "
+                          "[cpu alloc address=0x%016llx] "
+                          "[cpu free address=0x%016llx]",
+                          ref->dev_addr, ref->size,
+                          phys_addr(entry),
+                          phys_addr(ref));
+       }
+
+       if (ref->sg_call_ents && ref->type == dma_debug_sg &&
+           ref->sg_call_ents != entry->sg_call_ents) {
+               err_printk(ref->dev, entry, "DMA-API: device driver frees "
+                          "DMA sg list with different entry count "
+                          "[map count=%d] [unmap count=%d]\n",
+                          entry->sg_call_ents, ref->sg_call_ents);
+       }
+
+       /*
+        * This may be no bug in reality - but most implementations of the
+        * DMA API don't handle this properly, so check for it here
+        */
+       if (ref->direction != entry->direction) {
+               err_printk(ref->dev, entry, "DMA-API: device driver frees "
+                          "DMA memory with different direction "
+                          "[device address=0x%016llx] [size=%llu bytes] "
+                          "[mapped with %s] [unmapped with %s]\n",
+                          ref->dev_addr, ref->size,
+                          dir2name[entry->direction],
+                          dir2name[ref->direction]);
+       }
+
+       /*
+        * Drivers should use dma_mapping_error() to check the returned
+        * addresses of dma_map_single() and dma_map_page().
+        * If not, print this warning message. See Documentation/DMA-API.txt.
+        */
+       if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+               err_printk(ref->dev, entry,
+                          "DMA-API: device driver failed to check map error"
+                          "[device address=0x%016llx] [size=%llu bytes] "
+                          "[mapped as %s]",
+                          ref->dev_addr, ref->size,
+                          type2name[entry->type]);
+       }
+
+       hash_bucket_del(entry);
+       dma_entry_free(entry);
+
+       put_hash_bucket(bucket, &flags);
+}
+
+static void check_for_stack(struct device *dev,
+                           struct page *page, size_t offset)
+{
+       void *addr;
+       struct vm_struct *stack_vm_area = task_stack_vm_area(current);
+
+       if (!stack_vm_area) {
+               /* Stack is direct-mapped. */
+               if (PageHighMem(page))
+                       return;
+               addr = page_address(page) + offset;
+               if (object_is_on_stack(addr))
+                       err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
+       } else {
+               /* Stack is vmalloced. */
+               int i;
+
+               for (i = 0; i < stack_vm_area->nr_pages; i++) {
+                       if (page != stack_vm_area->pages[i])
+                               continue;
+
+                       addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
+                       err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
+                       break;
+               }
+       }
+}
+
+static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
+{
+       unsigned long a1 = (unsigned long)addr;
+       unsigned long b1 = a1 + len;
+       unsigned long a2 = (unsigned long)start;
+       unsigned long b2 = (unsigned long)end;
+
+       return !(b1 <= a2 || a1 >= b2);
+}
+
+static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
+{
+       if (overlap(addr, len, _stext, _etext) ||
+           overlap(addr, len, __start_rodata, __end_rodata))
+               err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
+}
+
+static void check_sync(struct device *dev,
+                      struct dma_debug_entry *ref,
+                      bool to_cpu)
+{
+       struct dma_debug_entry *entry;
+       struct hash_bucket *bucket;
+       unsigned long flags;
+
+       bucket = get_hash_bucket(ref, &flags);
+
+       entry = bucket_find_contain(&bucket, ref, &flags);
+
+       if (!entry) {
+               err_printk(dev, NULL, "DMA-API: device driver tries "
+                               "to sync DMA memory it has not allocated "
+                               "[device address=0x%016llx] [size=%llu bytes]\n",
+                               (unsigned long long)ref->dev_addr, ref->size);
+               goto out;
+       }
+
+       if (ref->size > entry->size) {
+               err_printk(dev, entry, "DMA-API: device driver syncs"
+                               " DMA memory outside allocated range "
+                               "[device address=0x%016llx] "
+                               "[allocation size=%llu bytes] "
+                               "[sync offset+size=%llu]\n",
+                               entry->dev_addr, entry->size,
+                               ref->size);
+       }
+
+       if (entry->direction == DMA_BIDIRECTIONAL)
+               goto out;
+
+       if (ref->direction != entry->direction) {
+               err_printk(dev, entry, "DMA-API: device driver syncs "
+                               "DMA memory with different direction "
+                               "[device address=0x%016llx] [size=%llu bytes] "
+                               "[mapped with %s] [synced with %s]\n",
+                               (unsigned long long)ref->dev_addr, entry->size,
+                               dir2name[entry->direction],
+                               dir2name[ref->direction]);
+       }
+
+       if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
+                     !(ref->direction == DMA_TO_DEVICE))
+               err_printk(dev, entry, "DMA-API: device driver syncs "
+                               "device read-only DMA memory for cpu "
+                               "[device address=0x%016llx] [size=%llu bytes] "
+                               "[mapped with %s] [synced with %s]\n",
+                               (unsigned long long)ref->dev_addr, entry->size,
+                               dir2name[entry->direction],
+                               dir2name[ref->direction]);
+
+       if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
+                      !(ref->direction == DMA_FROM_DEVICE))
+               err_printk(dev, entry, "DMA-API: device driver syncs "
+                               "device write-only DMA memory to device "
+                               "[device address=0x%016llx] [size=%llu bytes] "
+                               "[mapped with %s] [synced with %s]\n",
+                               (unsigned long long)ref->dev_addr, entry->size,
+                               dir2name[entry->direction],
+                               dir2name[ref->direction]);
+
+       if (ref->sg_call_ents && ref->type == dma_debug_sg &&
+           ref->sg_call_ents != entry->sg_call_ents) {
+               err_printk(ref->dev, entry, "DMA-API: device driver syncs "
+                          "DMA sg list with different entry count "
+                          "[map count=%d] [sync count=%d]\n",
+                          entry->sg_call_ents, ref->sg_call_ents);
+       }
+
+out:
+       put_hash_bucket(bucket, &flags);
+}
+
+static void check_sg_segment(struct device *dev, struct scatterlist *sg)
+{
+#ifdef CONFIG_DMA_API_DEBUG_SG
+       unsigned int max_seg = dma_get_max_seg_size(dev);
+       u64 start, end, boundary = dma_get_seg_boundary(dev);
+
+       /*
+        * Either the driver forgot to set dma_parms appropriately, or
+        * whoever generated the list forgot to check them.
+        */
+       if (sg->length > max_seg)
+               err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
+                          sg->length, max_seg);
+       /*
+        * In some cases this could potentially be the DMA API
+        * implementation's fault, but it would usually imply that
+        * the scatterlist was built inappropriately to begin with.
+        */
+       start = sg_dma_address(sg);
+       end = start + sg_dma_len(sg) - 1;
+       if ((start ^ end) & ~boundary)
+               err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
+                          start, end, boundary);
+#endif
+}
+
+void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
+                       size_t size, int direction, dma_addr_t dma_addr,
+                       bool map_single)
+{
+       struct dma_debug_entry *entry;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       if (dma_mapping_error(dev, dma_addr))
+               return;
+
+       entry = dma_entry_alloc();
+       if (!entry)
+               return;
+
+       entry->dev       = dev;
+       entry->type      = dma_debug_page;
+       entry->pfn       = page_to_pfn(page);
+       entry->offset    = offset,
+       entry->dev_addr  = dma_addr;
+       entry->size      = size;
+       entry->direction = direction;
+       entry->map_err_type = MAP_ERR_NOT_CHECKED;
+
+       if (map_single)
+               entry->type = dma_debug_single;
+
+       check_for_stack(dev, page, offset);
+
+       if (!PageHighMem(page)) {
+               void *addr = page_address(page) + offset;
+
+               check_for_illegal_area(dev, addr, size);
+       }
+
+       add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_map_page);
+
+void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+       struct dma_debug_entry ref;
+       struct dma_debug_entry *entry;
+       struct hash_bucket *bucket;
+       unsigned long flags;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       ref.dev = dev;
+       ref.dev_addr = dma_addr;
+       bucket = get_hash_bucket(&ref, &flags);
+
+       list_for_each_entry(entry, &bucket->list, list) {
+               if (!exact_match(&ref, entry))
+                       continue;
+
+               /*
+                * The same physical address can be mapped multiple
+                * times. Without a hardware IOMMU this results in the
+                * same device addresses being put into the dma-debug
+                * hash multiple times too. This can result in false
+                * positives being reported. Therefore we implement a
+                * best-fit algorithm here which updates the first entry
+                * from the hash which fits the reference value and is
+                * not currently listed as being checked.
+                */
+               if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+                       entry->map_err_type = MAP_ERR_CHECKED;
+                       break;
+               }
+       }
+
+       put_hash_bucket(bucket, &flags);
+}
+EXPORT_SYMBOL(debug_dma_mapping_error);
+
+void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+                         size_t size, int direction, bool map_single)
+{
+       struct dma_debug_entry ref = {
+               .type           = dma_debug_page,
+               .dev            = dev,
+               .dev_addr       = addr,
+               .size           = size,
+               .direction      = direction,
+       };
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       if (map_single)
+               ref.type = dma_debug_single;
+
+       check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_unmap_page);
+
+void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
+                     int nents, int mapped_ents, int direction)
+{
+       struct dma_debug_entry *entry;
+       struct scatterlist *s;
+       int i;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       for_each_sg(sg, s, mapped_ents, i) {
+               entry = dma_entry_alloc();
+               if (!entry)
+                       return;
+
+               entry->type           = dma_debug_sg;
+               entry->dev            = dev;
+               entry->pfn            = page_to_pfn(sg_page(s));
+               entry->offset         = s->offset,
+               entry->size           = sg_dma_len(s);
+               entry->dev_addr       = sg_dma_address(s);
+               entry->direction      = direction;
+               entry->sg_call_ents   = nents;
+               entry->sg_mapped_ents = mapped_ents;
+
+               check_for_stack(dev, sg_page(s), s->offset);
+
+               if (!PageHighMem(sg_page(s))) {
+                       check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
+               }
+
+               check_sg_segment(dev, s);
+
+               add_dma_entry(entry);
+       }
+}
+EXPORT_SYMBOL(debug_dma_map_sg);
+
+static int get_nr_mapped_entries(struct device *dev,
+                                struct dma_debug_entry *ref)
+{
+       struct dma_debug_entry *entry;
+       struct hash_bucket *bucket;
+       unsigned long flags;
+       int mapped_ents;
+
+       bucket       = get_hash_bucket(ref, &flags);
+       entry        = bucket_find_exact(bucket, ref);
+       mapped_ents  = 0;
+
+       if (entry)
+               mapped_ents = entry->sg_mapped_ents;
+       put_hash_bucket(bucket, &flags);
+
+       return mapped_ents;
+}
+
+void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+                       int nelems, int dir)
+{
+       struct scatterlist *s;
+       int mapped_ents = 0, i;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       for_each_sg(sglist, s, nelems, i) {
+
+               struct dma_debug_entry ref = {
+                       .type           = dma_debug_sg,
+                       .dev            = dev,
+                       .pfn            = page_to_pfn(sg_page(s)),
+                       .offset         = s->offset,
+                       .dev_addr       = sg_dma_address(s),
+                       .size           = sg_dma_len(s),
+                       .direction      = dir,
+                       .sg_call_ents   = nelems,
+               };
+
+               if (mapped_ents && i >= mapped_ents)
+                       break;
+
+               if (!i)
+                       mapped_ents = get_nr_mapped_entries(dev, &ref);
+
+               check_unmap(&ref);
+       }
+}
+EXPORT_SYMBOL(debug_dma_unmap_sg);
+
+void debug_dma_alloc_coherent(struct device *dev, size_t size,
+                             dma_addr_t dma_addr, void *virt)
+{
+       struct dma_debug_entry *entry;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       if (unlikely(virt == NULL))
+               return;
+
+       /* handle vmalloc and linear addresses */
+       if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
+               return;
+
+       entry = dma_entry_alloc();
+       if (!entry)
+               return;
+
+       entry->type      = dma_debug_coherent;
+       entry->dev       = dev;
+       entry->offset    = offset_in_page(virt);
+       entry->size      = size;
+       entry->dev_addr  = dma_addr;
+       entry->direction = DMA_BIDIRECTIONAL;
+
+       if (is_vmalloc_addr(virt))
+               entry->pfn = vmalloc_to_pfn(virt);
+       else
+               entry->pfn = page_to_pfn(virt_to_page(virt));
+
+       add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_alloc_coherent);
+
+void debug_dma_free_coherent(struct device *dev, size_t size,
+                        void *virt, dma_addr_t addr)
+{
+       struct dma_debug_entry ref = {
+               .type           = dma_debug_coherent,
+               .dev            = dev,
+               .offset         = offset_in_page(virt),
+               .dev_addr       = addr,
+               .size           = size,
+               .direction      = DMA_BIDIRECTIONAL,
+       };
+
+       /* handle vmalloc and linear addresses */
+       if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
+               return;
+
+       if (is_vmalloc_addr(virt))
+               ref.pfn = vmalloc_to_pfn(virt);
+       else
+               ref.pfn = page_to_pfn(virt_to_page(virt));
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_free_coherent);
+
+void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
+                           int direction, dma_addr_t dma_addr)
+{
+       struct dma_debug_entry *entry;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       entry = dma_entry_alloc();
+       if (!entry)
+               return;
+
+       entry->type             = dma_debug_resource;
+       entry->dev              = dev;
+       entry->pfn              = PHYS_PFN(addr);
+       entry->offset           = offset_in_page(addr);
+       entry->size             = size;
+       entry->dev_addr         = dma_addr;
+       entry->direction        = direction;
+       entry->map_err_type     = MAP_ERR_NOT_CHECKED;
+
+       add_dma_entry(entry);
+}
+EXPORT_SYMBOL(debug_dma_map_resource);
+
+void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
+                             size_t size, int direction)
+{
+       struct dma_debug_entry ref = {
+               .type           = dma_debug_resource,
+               .dev            = dev,
+               .dev_addr       = dma_addr,
+               .size           = size,
+               .direction      = direction,
+       };
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       check_unmap(&ref);
+}
+EXPORT_SYMBOL(debug_dma_unmap_resource);
+
+void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
+                                  size_t size, int direction)
+{
+       struct dma_debug_entry ref;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       ref.type         = dma_debug_single;
+       ref.dev          = dev;
+       ref.dev_addr     = dma_handle;
+       ref.size         = size;
+       ref.direction    = direction;
+       ref.sg_call_ents = 0;
+
+       check_sync(dev, &ref, true);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
+
+void debug_dma_sync_single_for_device(struct device *dev,
+                                     dma_addr_t dma_handle, size_t size,
+                                     int direction)
+{
+       struct dma_debug_entry ref;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       ref.type         = dma_debug_single;
+       ref.dev          = dev;
+       ref.dev_addr     = dma_handle;
+       ref.size         = size;
+       ref.direction    = direction;
+       ref.sg_call_ents = 0;
+
+       check_sync(dev, &ref, false);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_for_device);
+
+void debug_dma_sync_single_range_for_cpu(struct device *dev,
+                                        dma_addr_t dma_handle,
+                                        unsigned long offset, size_t size,
+                                        int direction)
+{
+       struct dma_debug_entry ref;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       ref.type         = dma_debug_single;
+       ref.dev          = dev;
+       ref.dev_addr     = dma_handle;
+       ref.size         = offset + size;
+       ref.direction    = direction;
+       ref.sg_call_ents = 0;
+
+       check_sync(dev, &ref, true);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
+
+void debug_dma_sync_single_range_for_device(struct device *dev,
+                                           dma_addr_t dma_handle,
+                                           unsigned long offset,
+                                           size_t size, int direction)
+{
+       struct dma_debug_entry ref;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       ref.type         = dma_debug_single;
+       ref.dev          = dev;
+       ref.dev_addr     = dma_handle;
+       ref.size         = offset + size;
+       ref.direction    = direction;
+       ref.sg_call_ents = 0;
+
+       check_sync(dev, &ref, false);
+}
+EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
+
+void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+                              int nelems, int direction)
+{
+       struct scatterlist *s;
+       int mapped_ents = 0, i;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       for_each_sg(sg, s, nelems, i) {
+
+               struct dma_debug_entry ref = {
+                       .type           = dma_debug_sg,
+                       .dev            = dev,
+                       .pfn            = page_to_pfn(sg_page(s)),
+                       .offset         = s->offset,
+                       .dev_addr       = sg_dma_address(s),
+                       .size           = sg_dma_len(s),
+                       .direction      = direction,
+                       .sg_call_ents   = nelems,
+               };
+
+               if (!i)
+                       mapped_ents = get_nr_mapped_entries(dev, &ref);
+
+               if (i >= mapped_ents)
+                       break;
+
+               check_sync(dev, &ref, true);
+       }
+}
+EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
+
+void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+                                 int nelems, int direction)
+{
+       struct scatterlist *s;
+       int mapped_ents = 0, i;
+
+       if (unlikely(dma_debug_disabled()))
+               return;
+
+       for_each_sg(sg, s, nelems, i) {
+
+               struct dma_debug_entry ref = {
+                       .type           = dma_debug_sg,
+                       .dev            = dev,
+                       .pfn            = page_to_pfn(sg_page(s)),
+                       .offset         = s->offset,
+                       .dev_addr       = sg_dma_address(s),
+                       .size           = sg_dma_len(s),
+                       .direction      = direction,
+                       .sg_call_ents   = nelems,
+               };
+               if (!i)
+                       mapped_ents = get_nr_mapped_entries(dev, &ref);
+
+               if (i >= mapped_ents)
+                       break;
+
+               check_sync(dev, &ref, false);
+       }
+}
+EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
+
+static int __init dma_debug_driver_setup(char *str)
+{
+       int i;
+
+       for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
+               current_driver_name[i] = *str;
+               if (*str == 0)
+                       break;
+       }
+
+       if (current_driver_name[0])
+               pr_info("DMA-API: enable driver filter for driver [%s]\n",
+                       current_driver_name);
+
+
+       return 1;
+}
+__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
new file mode 100644 (file)
index 0000000..8be8106
--- /dev/null
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DMA operations that map physical memory directly without using an IOMMU or
+ * flushing caches.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-direct.h>
+#include <linux/scatterlist.h>
+#include <linux/dma-contiguous.h>
+#include <linux/pfn.h>
+#include <linux/set_memory.h>
+
+#define DIRECT_MAPPING_ERROR           0
+
+/*
+ * Most architectures use ZONE_DMA for the first 16 Megabytes, but
+ * some use it for entirely different regions:
+ */
+#ifndef ARCH_ZONE_DMA_BITS
+#define ARCH_ZONE_DMA_BITS 24
+#endif
+
+/*
+ * For AMD SEV all DMA must be to unencrypted addresses.
+ */
+static inline bool force_dma_unencrypted(void)
+{
+       return sev_active();
+}
+
+static bool
+check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
+               const char *caller)
+{
+       if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
+               if (!dev->dma_mask) {
+                       dev_err(dev,
+                               "%s: call on device without dma_mask\n",
+                               caller);
+                       return false;
+               }
+
+               if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
+                       dev_err(dev,
+                               "%s: overflow %pad+%zu of device mask %llx\n",
+                               caller, &dma_addr, size, *dev->dma_mask);
+               }
+               return false;
+       }
+       return true;
+}
+
+static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
+{
+       dma_addr_t addr = force_dma_unencrypted() ?
+               __phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
+       return addr + size - 1 <= dev->coherent_dma_mask;
+}
+
+void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+               gfp_t gfp, unsigned long attrs)
+{
+       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       int page_order = get_order(size);
+       struct page *page = NULL;
+       void *ret;
+
+       /* we always manually zero the memory once we are done: */
+       gfp &= ~__GFP_ZERO;
+
+       /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
+       if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+               gfp |= GFP_DMA;
+       if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
+               gfp |= GFP_DMA32;
+
+again:
+       /* CMA can be used only in the context which permits sleeping */
+       if (gfpflags_allow_blocking(gfp)) {
+               page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
+               if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+                       dma_release_from_contiguous(dev, page, count);
+                       page = NULL;
+               }
+       }
+       if (!page)
+               page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
+
+       if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+               __free_pages(page, page_order);
+               page = NULL;
+
+               if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+                   dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
+                   !(gfp & (GFP_DMA32 | GFP_DMA))) {
+                       gfp |= GFP_DMA32;
+                       goto again;
+               }
+
+               if (IS_ENABLED(CONFIG_ZONE_DMA) &&
+                   dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
+                   !(gfp & GFP_DMA)) {
+                       gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
+                       goto again;
+               }
+       }
+
+       if (!page)
+               return NULL;
+       ret = page_address(page);
+       if (force_dma_unencrypted()) {
+               set_memory_decrypted((unsigned long)ret, 1 << page_order);
+               *dma_handle = __phys_to_dma(dev, page_to_phys(page));
+       } else {
+               *dma_handle = phys_to_dma(dev, page_to_phys(page));
+       }
+       memset(ret, 0, size);
+       return ret;
+}
+
+/*
+ * NOTE: this function must never look at the dma_addr argument, because we want
+ * to be able to use it as a helper for iommu implementations as well.
+ */
+void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
+               dma_addr_t dma_addr, unsigned long attrs)
+{
+       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       unsigned int page_order = get_order(size);
+
+       if (force_dma_unencrypted())
+               set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
+       if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
+               free_pages((unsigned long)cpu_addr, page_order);
+}
+
+dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
+               unsigned long offset, size_t size, enum dma_data_direction dir,
+               unsigned long attrs)
+{
+       dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
+
+       if (!check_addr(dev, dma_addr, size, __func__))
+               return DIRECT_MAPPING_ERROR;
+       return dma_addr;
+}
+
+int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+               enum dma_data_direction dir, unsigned long attrs)
+{
+       int i;
+       struct scatterlist *sg;
+
+       for_each_sg(sgl, sg, nents, i) {
+               BUG_ON(!sg_page(sg));
+
+               sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
+               if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
+                       return 0;
+               sg_dma_len(sg) = sg->length;
+       }
+
+       return nents;
+}
+
+int dma_direct_supported(struct device *dev, u64 mask)
+{
+#ifdef CONFIG_ZONE_DMA
+       if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
+               return 0;
+#else
+       /*
+        * Because 32-bit DMA masks are so common we expect every architecture
+        * to be able to satisfy them - either by not supporting more physical
+        * memory, or by providing a ZONE_DMA32.  If neither is the case, the
+        * architecture needs to use an IOMMU instead of the direct mapping.
+        */
+       if (mask < DMA_BIT_MASK(32))
+               return 0;
+#endif
+       /*
+        * Various PCI/PCIe bridges have broken support for > 32bit DMA even
+        * if the device itself might support it.
+        */
+       if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
+               return 0;
+       return 1;
+}
+
+int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
+{
+       return dma_addr == DIRECT_MAPPING_ERROR;
+}
+
+const struct dma_map_ops dma_direct_ops = {
+       .alloc                  = dma_direct_alloc,
+       .free                   = dma_direct_free,
+       .map_page               = dma_direct_map_page,
+       .map_sg                 = dma_direct_map_sg,
+       .dma_supported          = dma_direct_supported,
+       .mapping_error          = dma_direct_mapping_error,
+};
+EXPORT_SYMBOL(dma_direct_ops);
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
new file mode 100644 (file)
index 0000000..d2a92dd
--- /dev/null
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * arch-independent dma-mapping routines
+ *
+ * Copyright (c) 2006  SUSE Linux Products GmbH
+ * Copyright (c) 2006  Tejun Heo <teheo@suse.de>
+ */
+
+#include <linux/acpi.h>
+#include <linux/dma-mapping.h>
+#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+
+/*
+ * Managed DMA API
+ */
+struct dma_devres {
+       size_t          size;
+       void            *vaddr;
+       dma_addr_t      dma_handle;
+       unsigned long   attrs;
+};
+
+static void dmam_release(struct device *dev, void *res)
+{
+       struct dma_devres *this = res;
+
+       dma_free_attrs(dev, this->size, this->vaddr, this->dma_handle,
+                       this->attrs);
+}
+
+static int dmam_match(struct device *dev, void *res, void *match_data)
+{
+       struct dma_devres *this = res, *match = match_data;
+
+       if (this->vaddr == match->vaddr) {
+               WARN_ON(this->size != match->size ||
+                       this->dma_handle != match->dma_handle);
+               return 1;
+       }
+       return 0;
+}
+
+/**
+ * dmam_alloc_coherent - Managed dma_alloc_coherent()
+ * @dev: Device to allocate coherent memory for
+ * @size: Size of allocation
+ * @dma_handle: Out argument for allocated DMA handle
+ * @gfp: Allocation flags
+ *
+ * Managed dma_alloc_coherent().  Memory allocated using this function
+ * will be automatically released on driver detach.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+void *dmam_alloc_coherent(struct device *dev, size_t size,
+                          dma_addr_t *dma_handle, gfp_t gfp)
+{
+       struct dma_devres *dr;
+       void *vaddr;
+
+       dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
+       if (!dr)
+               return NULL;
+
+       vaddr = dma_alloc_coherent(dev, size, dma_handle, gfp);
+       if (!vaddr) {
+               devres_free(dr);
+               return NULL;
+       }
+
+       dr->vaddr = vaddr;
+       dr->dma_handle = *dma_handle;
+       dr->size = size;
+
+       devres_add(dev, dr);
+
+       return vaddr;
+}
+EXPORT_SYMBOL(dmam_alloc_coherent);
+
+/**
+ * dmam_free_coherent - Managed dma_free_coherent()
+ * @dev: Device to free coherent memory for
+ * @size: Size of allocation
+ * @vaddr: Virtual address of the memory to free
+ * @dma_handle: DMA handle of the memory to free
+ *
+ * Managed dma_free_coherent().
+ */
+void dmam_free_coherent(struct device *dev, size_t size, void *vaddr,
+                       dma_addr_t dma_handle)
+{
+       struct dma_devres match_data = { size, vaddr, dma_handle };
+
+       dma_free_coherent(dev, size, vaddr, dma_handle);
+       WARN_ON(devres_destroy(dev, dmam_release, dmam_match, &match_data));
+}
+EXPORT_SYMBOL(dmam_free_coherent);
+
+/**
+ * dmam_alloc_attrs - Managed dma_alloc_attrs()
+ * @dev: Device to allocate non_coherent memory for
+ * @size: Size of allocation
+ * @dma_handle: Out argument for allocated DMA handle
+ * @gfp: Allocation flags
+ * @attrs: Flags in the DMA_ATTR_* namespace.
+ *
+ * Managed dma_alloc_attrs().  Memory allocated using this function will be
+ * automatically released on driver detach.
+ *
+ * RETURNS:
+ * Pointer to allocated memory on success, NULL on failure.
+ */
+void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
+               gfp_t gfp, unsigned long attrs)
+{
+       struct dma_devres *dr;
+       void *vaddr;
+
+       dr = devres_alloc(dmam_release, sizeof(*dr), gfp);
+       if (!dr)
+               return NULL;
+
+       vaddr = dma_alloc_attrs(dev, size, dma_handle, gfp, attrs);
+       if (!vaddr) {
+               devres_free(dr);
+               return NULL;
+       }
+
+       dr->vaddr = vaddr;
+       dr->dma_handle = *dma_handle;
+       dr->size = size;
+       dr->attrs = attrs;
+
+       devres_add(dev, dr);
+
+       return vaddr;
+}
+EXPORT_SYMBOL(dmam_alloc_attrs);
+
+#ifdef CONFIG_HAVE_GENERIC_DMA_COHERENT
+
+static void dmam_coherent_decl_release(struct device *dev, void *res)
+{
+       dma_release_declared_memory(dev);
+}
+
+/**
+ * dmam_declare_coherent_memory - Managed dma_declare_coherent_memory()
+ * @dev: Device to declare coherent memory for
+ * @phys_addr: Physical address of coherent memory to be declared
+ * @device_addr: Device address of coherent memory to be declared
+ * @size: Size of coherent memory to be declared
+ * @flags: Flags
+ *
+ * Managed dma_declare_coherent_memory().
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+int dmam_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
+                                dma_addr_t device_addr, size_t size, int flags)
+{
+       void *res;
+       int rc;
+
+       res = devres_alloc(dmam_coherent_decl_release, 0, GFP_KERNEL);
+       if (!res)
+               return -ENOMEM;
+
+       rc = dma_declare_coherent_memory(dev, phys_addr, device_addr, size,
+                                        flags);
+       if (!rc)
+               devres_add(dev, res);
+       else
+               devres_free(res);
+
+       return rc;
+}
+EXPORT_SYMBOL(dmam_declare_coherent_memory);
+
+/**
+ * dmam_release_declared_memory - Managed dma_release_declared_memory().
+ * @dev: Device to release declared coherent memory for
+ *
+ * Managed dmam_release_declared_memory().
+ */
+void dmam_release_declared_memory(struct device *dev)
+{
+       WARN_ON(devres_destroy(dev, dmam_coherent_decl_release, NULL, NULL));
+}
+EXPORT_SYMBOL(dmam_release_declared_memory);
+
+#endif
+
+/*
+ * Create scatter-list for the already allocated DMA buffer.
+ */
+int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
+                void *cpu_addr, dma_addr_t handle, size_t size)
+{
+       struct page *page = virt_to_page(cpu_addr);
+       int ret;
+
+       ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+       if (unlikely(ret))
+               return ret;
+
+       sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+       return 0;
+}
+EXPORT_SYMBOL(dma_common_get_sgtable);
+
+/*
+ * Create userspace mapping for the DMA-coherent memory.
+ */
+int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
+                   void *cpu_addr, dma_addr_t dma_addr, size_t size)
+{
+       int ret = -ENXIO;
+#ifndef CONFIG_ARCH_NO_COHERENT_DMA_MMAP
+       unsigned long user_count = vma_pages(vma);
+       unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+       unsigned long off = vma->vm_pgoff;
+
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
+               return ret;
+
+       if (off < count && user_count <= (count - off))
+               ret = remap_pfn_range(vma, vma->vm_start,
+                                     page_to_pfn(virt_to_page(cpu_addr)) + off,
+                                     user_count << PAGE_SHIFT,
+                                     vma->vm_page_prot);
+#endif /* !CONFIG_ARCH_NO_COHERENT_DMA_MMAP */
+
+       return ret;
+}
+EXPORT_SYMBOL(dma_common_mmap);
+
+#ifdef CONFIG_MMU
+static struct vm_struct *__dma_common_pages_remap(struct page **pages,
+                       size_t size, unsigned long vm_flags, pgprot_t prot,
+                       const void *caller)
+{
+       struct vm_struct *area;
+
+       area = get_vm_area_caller(size, vm_flags, caller);
+       if (!area)
+               return NULL;
+
+       if (map_vm_area(area, prot, pages)) {
+               vunmap(area->addr);
+               return NULL;
+       }
+
+       return area;
+}
+
+/*
+ * remaps an array of PAGE_SIZE pages into another vm_area
+ * Cannot be used in non-sleeping contexts
+ */
+void *dma_common_pages_remap(struct page **pages, size_t size,
+                       unsigned long vm_flags, pgprot_t prot,
+                       const void *caller)
+{
+       struct vm_struct *area;
+
+       area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+       if (!area)
+               return NULL;
+
+       area->pages = pages;
+
+       return area->addr;
+}
+
+/*
+ * remaps an allocated contiguous region into another vm_area.
+ * Cannot be used in non-sleeping contexts
+ */
+
+void *dma_common_contiguous_remap(struct page *page, size_t size,
+                       unsigned long vm_flags,
+                       pgprot_t prot, const void *caller)
+{
+       int i;
+       struct page **pages;
+       struct vm_struct *area;
+
+       pages = kmalloc(sizeof(struct page *) << get_order(size), GFP_KERNEL);
+       if (!pages)
+               return NULL;
+
+       for (i = 0; i < (size >> PAGE_SHIFT); i++)
+               pages[i] = nth_page(page, i);
+
+       area = __dma_common_pages_remap(pages, size, vm_flags, prot, caller);
+
+       kfree(pages);
+
+       if (!area)
+               return NULL;
+       return area->addr;
+}
+
+/*
+ * unmaps a range previously mapped by dma_common_*_remap
+ */
+void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags)
+{
+       struct vm_struct *area = find_vm_area(cpu_addr);
+
+       if (!area || (area->flags & vm_flags) != vm_flags) {
+               WARN(1, "trying to free invalid coherent area: %p\n", cpu_addr);
+               return;
+       }
+
+       unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
+       vunmap(cpu_addr);
+}
+#endif
+
+/*
+ * enables DMA API use for a device
+ */
+int dma_configure(struct device *dev)
+{
+       if (dev->bus->dma_configure)
+               return dev->bus->dma_configure(dev);
+       return 0;
+}
+
+void dma_deconfigure(struct device *dev)
+{
+       of_dma_deconfigure(dev);
+       acpi_dma_deconfigure(dev);
+}
diff --git a/kernel/dma/noncoherent.c b/kernel/dma/noncoherent.c
new file mode 100644 (file)
index 0000000..79e9a75
--- /dev/null
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018 Christoph Hellwig.
+ *
+ * DMA operations that map physical memory directly without providing cache
+ * coherence.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/scatterlist.h>
+
+static void dma_noncoherent_sync_single_for_device(struct device *dev,
+               dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+       arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_noncoherent_sync_sg_for_device(struct device *dev,
+               struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sgl, sg, nents, i)
+               arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
+}
+
+static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
+               unsigned long offset, size_t size, enum dma_data_direction dir,
+               unsigned long attrs)
+{
+       dma_addr_t addr;
+
+       addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+       if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+               arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
+                               size, dir);
+       return addr;
+}
+
+static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
+               int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+       nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
+       if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+               dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
+       return nents;
+}
+
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
+               dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+       arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
+}
+
+static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
+               struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sgl, sg, nents, i)
+               arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
+}
+
+static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
+               size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+       if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+               dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
+}
+
+static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
+               int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+       if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+               dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
+}
+#endif
+
+const struct dma_map_ops dma_noncoherent_ops = {
+       .alloc                  = arch_dma_alloc,
+       .free                   = arch_dma_free,
+       .mmap                   = arch_dma_mmap,
+       .sync_single_for_device = dma_noncoherent_sync_single_for_device,
+       .sync_sg_for_device     = dma_noncoherent_sync_sg_for_device,
+       .map_page               = dma_noncoherent_map_page,
+       .map_sg                 = dma_noncoherent_map_sg,
+#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
+       .sync_single_for_cpu    = dma_noncoherent_sync_single_for_cpu,
+       .sync_sg_for_cpu        = dma_noncoherent_sync_sg_for_cpu,
+       .unmap_page             = dma_noncoherent_unmap_page,
+       .unmap_sg               = dma_noncoherent_unmap_sg,
+#endif
+       .dma_supported          = dma_direct_supported,
+       .mapping_error          = dma_direct_mapping_error,
+       .cache_sync             = arch_dma_cache_sync,
+};
+EXPORT_SYMBOL(dma_noncoherent_ops);
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
new file mode 100644 (file)
index 0000000..04b68d9
--- /dev/null
@@ -0,0 +1,1087 @@
+/*
+ * Dynamic DMA mapping support.
+ *
+ * This implementation is a fallback for platforms that do not support
+ * I/O TLBs (aka DMA address translation hardware).
+ * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
+ * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
+ * Copyright (C) 2000, 2003 Hewlett-Packard Co
+ *     David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * 03/05/07 davidm     Switch from PCI-DMA to generic device DMA API.
+ * 00/12/13 davidm     Rename to swiotlb.c and add mark_clean() to avoid
+ *                     unnecessary i-cache flushing.
+ * 04/07/.. ak         Better overflow handling. Assorted fixes.
+ * 05/09/10 linville   Add support for syncing ranges, support syncing for
+ *                     DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
+ * 08/12/11 beckyb     Add highmem support
+ */
+
+#include <linux/cache.h>
+#include <linux/dma-direct.h>
+#include <linux/mm.h>
+#include <linux/export.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/swiotlb.h>
+#include <linux/pfn.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/highmem.h>
+#include <linux/gfp.h>
+#include <linux/scatterlist.h>
+#include <linux/mem_encrypt.h>
+#include <linux/set_memory.h>
+
+#include <asm/io.h>
+#include <asm/dma.h>
+
+#include <linux/init.h>
+#include <linux/bootmem.h>
+#include <linux/iommu-helper.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/swiotlb.h>
+
+#define OFFSET(val,align) ((unsigned long)     \
+                          ( (val) & ( (align) - 1)))
+
+#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
+
+/*
+ * Minimum IO TLB size to bother booting with.  Systems with mainly
+ * 64bit capable cards will only lightly use the swiotlb.  If we can't
+ * allocate a contiguous 1MB, we're probably in trouble anyway.
+ */
+#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
+
+enum swiotlb_force swiotlb_force;
+
+/*
+ * Used to do a quick range check in swiotlb_tbl_unmap_single and
+ * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
+ * API.
+ */
+static phys_addr_t io_tlb_start, io_tlb_end;
+
+/*
+ * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
+ * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
+ */
+static unsigned long io_tlb_nslabs;
+
+/*
+ * When the IOMMU overflows we return a fallback buffer. This sets the size.
+ */
+static unsigned long io_tlb_overflow = 32*1024;
+
+static phys_addr_t io_tlb_overflow_buffer;
+
+/*
+ * This is a free list describing the number of free entries available from
+ * each index
+ */
+static unsigned int *io_tlb_list;
+static unsigned int io_tlb_index;
+
+/*
+ * Max segment that we can provide which (if pages are contingous) will
+ * not be bounced (unless SWIOTLB_FORCE is set).
+ */
+unsigned int max_segment;
+
+/*
+ * We need to save away the original address corresponding to a mapped entry
+ * for the sync operations.
+ */
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
+static phys_addr_t *io_tlb_orig_addr;
+
+/*
+ * Protect the above data structures in the map and unmap calls
+ */
+static DEFINE_SPINLOCK(io_tlb_lock);
+
+static int late_alloc;
+
+static int __init
+setup_io_tlb_npages(char *str)
+{
+       if (isdigit(*str)) {
+               io_tlb_nslabs = simple_strtoul(str, &str, 0);
+               /* avoid tail segment of size < IO_TLB_SEGSIZE */
+               io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+       }
+       if (*str == ',')
+               ++str;
+       if (!strcmp(str, "force")) {
+               swiotlb_force = SWIOTLB_FORCE;
+       } else if (!strcmp(str, "noforce")) {
+               swiotlb_force = SWIOTLB_NO_FORCE;
+               io_tlb_nslabs = 1;
+       }
+
+       return 0;
+}
+early_param("swiotlb", setup_io_tlb_npages);
+/* make io_tlb_overflow tunable too? */
+
+unsigned long swiotlb_nr_tbl(void)
+{
+       return io_tlb_nslabs;
+}
+EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
+
+unsigned int swiotlb_max_segment(void)
+{
+       return max_segment;
+}
+EXPORT_SYMBOL_GPL(swiotlb_max_segment);
+
+void swiotlb_set_max_segment(unsigned int val)
+{
+       if (swiotlb_force == SWIOTLB_FORCE)
+               max_segment = 1;
+       else
+               max_segment = rounddown(val, PAGE_SIZE);
+}
+
+/* default to 64MB */
+#define IO_TLB_DEFAULT_SIZE (64UL<<20)
+unsigned long swiotlb_size_or_default(void)
+{
+       unsigned long size;
+
+       size = io_tlb_nslabs << IO_TLB_SHIFT;
+
+       return size ? size : (IO_TLB_DEFAULT_SIZE);
+}
+
+static bool no_iotlb_memory;
+
+void swiotlb_print_info(void)
+{
+       unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+       unsigned char *vstart, *vend;
+
+       if (no_iotlb_memory) {
+               pr_warn("software IO TLB: No low mem\n");
+               return;
+       }
+
+       vstart = phys_to_virt(io_tlb_start);
+       vend = phys_to_virt(io_tlb_end);
+
+       printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
+              (unsigned long long)io_tlb_start,
+              (unsigned long long)io_tlb_end,
+              bytes >> 20, vstart, vend - 1);
+}
+
+/*
+ * Early SWIOTLB allocation may be too early to allow an architecture to
+ * perform the desired operations.  This function allows the architecture to
+ * call SWIOTLB when the operations are possible.  It needs to be called
+ * before the SWIOTLB memory is used.
+ */
+void __init swiotlb_update_mem_attributes(void)
+{
+       void *vaddr;
+       unsigned long bytes;
+
+       if (no_iotlb_memory || late_alloc)
+               return;
+
+       vaddr = phys_to_virt(io_tlb_start);
+       bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
+       set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+       memset(vaddr, 0, bytes);
+
+       vaddr = phys_to_virt(io_tlb_overflow_buffer);
+       bytes = PAGE_ALIGN(io_tlb_overflow);
+       set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
+       memset(vaddr, 0, bytes);
+}
+
+int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+{
+       void *v_overflow_buffer;
+       unsigned long i, bytes;
+
+       bytes = nslabs << IO_TLB_SHIFT;
+
+       io_tlb_nslabs = nslabs;
+       io_tlb_start = __pa(tlb);
+       io_tlb_end = io_tlb_start + bytes;
+
+       /*
+        * Get the overflow emergency buffer
+        */
+       v_overflow_buffer = memblock_virt_alloc_low_nopanic(
+                                               PAGE_ALIGN(io_tlb_overflow),
+                                               PAGE_SIZE);
+       if (!v_overflow_buffer)
+               return -ENOMEM;
+
+       io_tlb_overflow_buffer = __pa(v_overflow_buffer);
+
+       /*
+        * Allocate and initialize the free list array.  This array is used
+        * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+        * between io_tlb_start and io_tlb_end.
+        */
+       io_tlb_list = memblock_virt_alloc(
+                               PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
+                               PAGE_SIZE);
+       io_tlb_orig_addr = memblock_virt_alloc(
+                               PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
+                               PAGE_SIZE);
+       for (i = 0; i < io_tlb_nslabs; i++) {
+               io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+               io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+       }
+       io_tlb_index = 0;
+
+       if (verbose)
+               swiotlb_print_info();
+
+       swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+       return 0;
+}
+
+/*
+ * Statically reserve bounce buffer space and initialize bounce buffer data
+ * structures for the software IO TLB used to implement the DMA API.
+ */
+void  __init
+swiotlb_init(int verbose)
+{
+       size_t default_size = IO_TLB_DEFAULT_SIZE;
+       unsigned char *vstart;
+       unsigned long bytes;
+
+       if (!io_tlb_nslabs) {
+               io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+               io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+       }
+
+       bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+       /* Get IO TLB memory from the low pages */
+       vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
+       if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+               return;
+
+       if (io_tlb_start)
+               memblock_free_early(io_tlb_start,
+                                   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+       pr_warn("Cannot allocate SWIOTLB buffer");
+       no_iotlb_memory = true;
+}
+
+/*
+ * Systems with larger DMA zones (those that don't support ISA) can
+ * initialize the swiotlb later using the slab allocator if needed.
+ * This should be just like above, but with some error catching.
+ */
+int
+swiotlb_late_init_with_default_size(size_t default_size)
+{
+       unsigned long bytes, req_nslabs = io_tlb_nslabs;
+       unsigned char *vstart = NULL;
+       unsigned int order;
+       int rc = 0;
+
+       if (!io_tlb_nslabs) {
+               io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
+               io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+       }
+
+       /*
+        * Get IO TLB memory from the low pages
+        */
+       order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
+       io_tlb_nslabs = SLABS_PER_PAGE << order;
+       bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+
+       while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+               vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+                                                 order);
+               if (vstart)
+                       break;
+               order--;
+       }
+
+       if (!vstart) {
+               io_tlb_nslabs = req_nslabs;
+               return -ENOMEM;
+       }
+       if (order != get_order(bytes)) {
+               printk(KERN_WARNING "Warning: only able to allocate %ld MB "
+                      "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
+               io_tlb_nslabs = SLABS_PER_PAGE << order;
+       }
+       rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
+       if (rc)
+               free_pages((unsigned long)vstart, order);
+
+       return rc;
+}
+
+int
+swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+{
+       unsigned long i, bytes;
+       unsigned char *v_overflow_buffer;
+
+       bytes = nslabs << IO_TLB_SHIFT;
+
+       io_tlb_nslabs = nslabs;
+       io_tlb_start = virt_to_phys(tlb);
+       io_tlb_end = io_tlb_start + bytes;
+
+       set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
+       memset(tlb, 0, bytes);
+
+       /*
+        * Get the overflow emergency buffer
+        */
+       v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
+                                                    get_order(io_tlb_overflow));
+       if (!v_overflow_buffer)
+               goto cleanup2;
+
+       set_memory_decrypted((unsigned long)v_overflow_buffer,
+                       io_tlb_overflow >> PAGE_SHIFT);
+       memset(v_overflow_buffer, 0, io_tlb_overflow);
+       io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
+
+       /*
+        * Allocate and initialize the free list array.  This array is used
+        * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
+        * between io_tlb_start and io_tlb_end.
+        */
+       io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
+                                     get_order(io_tlb_nslabs * sizeof(int)));
+       if (!io_tlb_list)
+               goto cleanup3;
+
+       io_tlb_orig_addr = (phys_addr_t *)
+               __get_free_pages(GFP_KERNEL,
+                                get_order(io_tlb_nslabs *
+                                          sizeof(phys_addr_t)));
+       if (!io_tlb_orig_addr)
+               goto cleanup4;
+
+       for (i = 0; i < io_tlb_nslabs; i++) {
+               io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+               io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+       }
+       io_tlb_index = 0;
+
+       swiotlb_print_info();
+
+       late_alloc = 1;
+
+       swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+
+       return 0;
+
+cleanup4:
+       free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+                                                        sizeof(int)));
+       io_tlb_list = NULL;
+cleanup3:
+       free_pages((unsigned long)v_overflow_buffer,
+                  get_order(io_tlb_overflow));
+       io_tlb_overflow_buffer = 0;
+cleanup2:
+       io_tlb_end = 0;
+       io_tlb_start = 0;
+       io_tlb_nslabs = 0;
+       max_segment = 0;
+       return -ENOMEM;
+}
+
+void __init swiotlb_exit(void)
+{
+       if (!io_tlb_orig_addr)
+               return;
+
+       if (late_alloc) {
+               free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
+                          get_order(io_tlb_overflow));
+               free_pages((unsigned long)io_tlb_orig_addr,
+                          get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
+               free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
+                                                                sizeof(int)));
+               free_pages((unsigned long)phys_to_virt(io_tlb_start),
+                          get_order(io_tlb_nslabs << IO_TLB_SHIFT));
+       } else {
+               memblock_free_late(io_tlb_overflow_buffer,
+                                  PAGE_ALIGN(io_tlb_overflow));
+               memblock_free_late(__pa(io_tlb_orig_addr),
+                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+               memblock_free_late(__pa(io_tlb_list),
+                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
+               memblock_free_late(io_tlb_start,
+                                  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+       }
+       io_tlb_nslabs = 0;
+       max_segment = 0;
+}
+
+int is_swiotlb_buffer(phys_addr_t paddr)
+{
+       return paddr >= io_tlb_start && paddr < io_tlb_end;
+}
+
+/*
+ * Bounce: copy the swiotlb buffer back to the original dma location
+ */
+static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
+                          size_t size, enum dma_data_direction dir)
+{
+       unsigned long pfn = PFN_DOWN(orig_addr);
+       unsigned char *vaddr = phys_to_virt(tlb_addr);
+
+       if (PageHighMem(pfn_to_page(pfn))) {
+               /* The buffer does not have a mapping.  Map it in and copy */
+               unsigned int offset = orig_addr & ~PAGE_MASK;
+               char *buffer;
+               unsigned int sz = 0;
+               unsigned long flags;
+
+               while (size) {
+                       sz = min_t(size_t, PAGE_SIZE - offset, size);
+
+                       local_irq_save(flags);
+                       buffer = kmap_atomic(pfn_to_page(pfn));
+                       if (dir == DMA_TO_DEVICE)
+                               memcpy(vaddr, buffer + offset, sz);
+                       else
+                               memcpy(buffer + offset, vaddr, sz);
+                       kunmap_atomic(buffer);
+                       local_irq_restore(flags);
+
+                       size -= sz;
+                       pfn++;
+                       vaddr += sz;
+                       offset = 0;
+               }
+       } else if (dir == DMA_TO_DEVICE) {
+               memcpy(vaddr, phys_to_virt(orig_addr), size);
+       } else {
+               memcpy(phys_to_virt(orig_addr), vaddr, size);
+       }
+}
+
+phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
+                                  dma_addr_t tbl_dma_addr,
+                                  phys_addr_t orig_addr, size_t size,
+                                  enum dma_data_direction dir,
+                                  unsigned long attrs)
+{
+       unsigned long flags;
+       phys_addr_t tlb_addr;
+       unsigned int nslots, stride, index, wrap;
+       int i;
+       unsigned long mask;
+       unsigned long offset_slots;
+       unsigned long max_slots;
+
+       if (no_iotlb_memory)
+               panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
+       if (mem_encrypt_active())
+               pr_warn_once("%s is active and system is using DMA bounce buffers\n",
+                            sme_active() ? "SME" : "SEV");
+
+       mask = dma_get_seg_boundary(hwdev);
+
+       tbl_dma_addr &= mask;
+
+       offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+
+       /*
+        * Carefully handle integer overflow which can occur when mask == ~0UL.
+        */
+       max_slots = mask + 1
+                   ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
+                   : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+
+       /*
+        * For mappings greater than or equal to a page, we limit the stride
+        * (and hence alignment) to a page size.
+        */
+       nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+       if (size >= PAGE_SIZE)
+               stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
+       else
+               stride = 1;
+
+       BUG_ON(!nslots);
+
+       /*
+        * Find suitable number of IO TLB entries size that will fit this
+        * request and allocate a buffer from that IO TLB pool.
+        */
+       spin_lock_irqsave(&io_tlb_lock, flags);
+       index = ALIGN(io_tlb_index, stride);
+       if (index >= io_tlb_nslabs)
+               index = 0;
+       wrap = index;
+
+       do {
+               while (iommu_is_span_boundary(index, nslots, offset_slots,
+                                             max_slots)) {
+                       index += stride;
+                       if (index >= io_tlb_nslabs)
+                               index = 0;
+                       if (index == wrap)
+                               goto not_found;
+               }
+
+               /*
+                * If we find a slot that indicates we have 'nslots' number of
+                * contiguous buffers, we allocate the buffers from that slot
+                * and mark the entries as '0' indicating unavailable.
+                */
+               if (io_tlb_list[index] >= nslots) {
+                       int count = 0;
+
+                       for (i = index; i < (int) (index + nslots); i++)
+                               io_tlb_list[i] = 0;
+                       for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
+                               io_tlb_list[i] = ++count;
+                       tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
+
+                       /*
+                        * Update the indices to avoid searching in the next
+                        * round.
+                        */
+                       io_tlb_index = ((index + nslots) < io_tlb_nslabs
+                                       ? (index + nslots) : 0);
+
+                       goto found;
+               }
+               index += stride;
+               if (index >= io_tlb_nslabs)
+                       index = 0;
+       } while (index != wrap);
+
+not_found:
+       spin_unlock_irqrestore(&io_tlb_lock, flags);
+       if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
+               dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
+       return SWIOTLB_MAP_ERROR;
+found:
+       spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+       /*
+        * Save away the mapping from the original address to the DMA address.
+        * This is needed when we sync the memory.  Then we sync the buffer if
+        * needed.
+        */
+       for (i = 0; i < nslots; i++)
+               io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+       if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+           (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+               swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
+
+       return tlb_addr;
+}
+
+/*
+ * Allocates bounce buffer and returns its physical address.
+ */
+static phys_addr_t
+map_single(struct device *hwdev, phys_addr_t phys, size_t size,
+          enum dma_data_direction dir, unsigned long attrs)
+{
+       dma_addr_t start_dma_addr;
+
+       if (swiotlb_force == SWIOTLB_NO_FORCE) {
+               dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
+                                    &phys);
+               return SWIOTLB_MAP_ERROR;
+       }
+
+       start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
+       return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
+                                     dir, attrs);
+}
+
+/*
+ * tlb_addr is the physical address of the bounce buffer to unmap.
+ */
+void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
+                             size_t size, enum dma_data_direction dir,
+                             unsigned long attrs)
+{
+       unsigned long flags;
+       int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+       int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+       phys_addr_t orig_addr = io_tlb_orig_addr[index];
+
+       /*
+        * First, sync the memory before unmapping the entry
+        */
+       if (orig_addr != INVALID_PHYS_ADDR &&
+           !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+           ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
+               swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
+
+       /*
+        * Return the buffer to the free list by setting the corresponding
+        * entries to indicate the number of contiguous entries available.
+        * While returning the entries to the free list, we merge the entries
+        * with slots below and above the pool being returned.
+        */
+       spin_lock_irqsave(&io_tlb_lock, flags);
+       {
+               count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
+                        io_tlb_list[index + nslots] : 0);
+               /*
+                * Step 1: return the slots to the free list, merging the
+                * slots with superceeding slots
+                */
+               for (i = index + nslots - 1; i >= index; i--) {
+                       io_tlb_list[i] = ++count;
+                       io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+               }
+               /*
+                * Step 2: merge the returned slots with the preceding slots,
+                * if available (non zero)
+                */
+               for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
+                       io_tlb_list[i] = ++count;
+       }
+       spin_unlock_irqrestore(&io_tlb_lock, flags);
+}
+
+void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
+                            size_t size, enum dma_data_direction dir,
+                            enum dma_sync_target target)
+{
+       int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+       phys_addr_t orig_addr = io_tlb_orig_addr[index];
+
+       if (orig_addr == INVALID_PHYS_ADDR)
+               return;
+       orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
+
+       switch (target) {
+       case SYNC_FOR_CPU:
+               if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
+                       swiotlb_bounce(orig_addr, tlb_addr,
+                                      size, DMA_FROM_DEVICE);
+               else
+                       BUG_ON(dir != DMA_TO_DEVICE);
+               break;
+       case SYNC_FOR_DEVICE:
+               if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+                       swiotlb_bounce(orig_addr, tlb_addr,
+                                      size, DMA_TO_DEVICE);
+               else
+                       BUG_ON(dir != DMA_FROM_DEVICE);
+               break;
+       default:
+               BUG();
+       }
+}
+
+static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
+               size_t size)
+{
+       u64 mask = DMA_BIT_MASK(32);
+
+       if (dev && dev->coherent_dma_mask)
+               mask = dev->coherent_dma_mask;
+       return addr + size - 1 <= mask;
+}
+
+static void *
+swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
+               unsigned long attrs)
+{
+       phys_addr_t phys_addr;
+
+       if (swiotlb_force == SWIOTLB_NO_FORCE)
+               goto out_warn;
+
+       phys_addr = swiotlb_tbl_map_single(dev,
+                       __phys_to_dma(dev, io_tlb_start),
+                       0, size, DMA_FROM_DEVICE, attrs);
+       if (phys_addr == SWIOTLB_MAP_ERROR)
+               goto out_warn;
+
+       *dma_handle = __phys_to_dma(dev, phys_addr);
+       if (!dma_coherent_ok(dev, *dma_handle, size))
+               goto out_unmap;
+
+       memset(phys_to_virt(phys_addr), 0, size);
+       return phys_to_virt(phys_addr);
+
+out_unmap:
+       dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
+               (unsigned long long)dev->coherent_dma_mask,
+               (unsigned long long)*dma_handle);
+
+       /*
+        * DMA_TO_DEVICE to avoid memcpy in unmap_single.
+        * DMA_ATTR_SKIP_CPU_SYNC is optional.
+        */
+       swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
+                       DMA_ATTR_SKIP_CPU_SYNC);
+out_warn:
+       if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
+               dev_warn(dev,
+                       "swiotlb: coherent allocation failed, size=%zu\n",
+                       size);
+               dump_stack();
+       }
+       return NULL;
+}
+
+static bool swiotlb_free_buffer(struct device *dev, size_t size,
+               dma_addr_t dma_addr)
+{
+       phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
+
+       WARN_ON_ONCE(irqs_disabled());
+
+       if (!is_swiotlb_buffer(phys_addr))
+               return false;
+
+       /*
+        * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
+        * DMA_ATTR_SKIP_CPU_SYNC is optional.
+        */
+       swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
+                                DMA_ATTR_SKIP_CPU_SYNC);
+       return true;
+}
+
+static void
+swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
+            int do_panic)
+{
+       if (swiotlb_force == SWIOTLB_NO_FORCE)
+               return;
+
+       /*
+        * Ran out of IOMMU space for this operation. This is very bad.
+        * Unfortunately the drivers cannot handle this operation properly.
+        * unless they check for dma_mapping_error (most don't)
+        * When the mapping is small enough return a static buffer to limit
+        * the damage, or panic when the transfer is too big.
+        */
+       dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
+                           size);
+
+       if (size <= io_tlb_overflow || !do_panic)
+               return;
+
+       if (dir == DMA_BIDIRECTIONAL)
+               panic("DMA: Random memory could be DMA accessed\n");
+       if (dir == DMA_FROM_DEVICE)
+               panic("DMA: Random memory could be DMA written\n");
+       if (dir == DMA_TO_DEVICE)
+               panic("DMA: Random memory could be DMA read\n");
+}
+
+/*
+ * Map a single buffer of the indicated size for DMA in streaming mode.  The
+ * physical address to use is returned.
+ *
+ * Once the device is given the dma address, the device owns this memory until
+ * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
+ */
+dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
+                           unsigned long offset, size_t size,
+                           enum dma_data_direction dir,
+                           unsigned long attrs)
+{
+       phys_addr_t map, phys = page_to_phys(page) + offset;
+       dma_addr_t dev_addr = phys_to_dma(dev, phys);
+
+       BUG_ON(dir == DMA_NONE);
+       /*
+        * If the address happens to be in the device's DMA window,
+        * we can safely return the device addr and not worry about bounce
+        * buffering it.
+        */
+       if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
+               return dev_addr;
+
+       trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+
+       /* Oh well, have to allocate and map a bounce buffer. */
+       map = map_single(dev, phys, size, dir, attrs);
+       if (map == SWIOTLB_MAP_ERROR) {
+               swiotlb_full(dev, size, dir, 1);
+               return __phys_to_dma(dev, io_tlb_overflow_buffer);
+       }
+
+       dev_addr = __phys_to_dma(dev, map);
+
+       /* Ensure that the address returned is DMA'ble */
+       if (dma_capable(dev, dev_addr, size))
+               return dev_addr;
+
+       attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+       swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
+
+       return __phys_to_dma(dev, io_tlb_overflow_buffer);
+}
+
+/*
+ * Unmap a single streaming mode DMA translation.  The dma_addr and size must
+ * match what was provided for in a previous swiotlb_map_page call.  All
+ * other usages are undefined.
+ *
+ * After this call, reads by the cpu to the buffer are guaranteed to see
+ * whatever the device wrote there.
+ */
+static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
+                        size_t size, enum dma_data_direction dir,
+                        unsigned long attrs)
+{
+       phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
+
+       BUG_ON(dir == DMA_NONE);
+
+       if (is_swiotlb_buffer(paddr)) {
+               swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
+               return;
+       }
+
+       if (dir != DMA_FROM_DEVICE)
+               return;
+
+       /*
+        * phys_to_virt doesn't work with hihgmem page but we could
+        * call dma_mark_clean() with hihgmem page here. However, we
+        * are fine since dma_mark_clean() is null on POWERPC. We can
+        * make dma_mark_clean() take a physical address if necessary.
+        */
+       dma_mark_clean(phys_to_virt(paddr), size);
+}
+
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+                       size_t size, enum dma_data_direction dir,
+                       unsigned long attrs)
+{
+       unmap_single(hwdev, dev_addr, size, dir, attrs);
+}
+
+/*
+ * Make physical memory consistent for a single streaming mode DMA translation
+ * after a transfer.
+ *
+ * If you perform a swiotlb_map_page() but wish to interrogate the buffer
+ * using the cpu, yet do not wish to teardown the dma mapping, you must
+ * call this function before doing so.  At the next point you give the dma
+ * address back to the card, you must first perform a
+ * swiotlb_dma_sync_for_device, and then the device again owns the buffer
+ */
+static void
+swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
+                   size_t size, enum dma_data_direction dir,
+                   enum dma_sync_target target)
+{
+       phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
+
+       BUG_ON(dir == DMA_NONE);
+
+       if (is_swiotlb_buffer(paddr)) {
+               swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
+               return;
+       }
+
+       if (dir != DMA_FROM_DEVICE)
+               return;
+
+       dma_mark_clean(phys_to_virt(paddr), size);
+}
+
+void
+swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
+                           size_t size, enum dma_data_direction dir)
+{
+       swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
+}
+
+void
+swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
+                              size_t size, enum dma_data_direction dir)
+{
+       swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
+}
+
+/*
+ * Map a set of buffers described by scatterlist in streaming mode for DMA.
+ * This is the scatter-gather version of the above swiotlb_map_page
+ * interface.  Here the scatter gather list elements are each tagged with the
+ * appropriate dma address and length.  They are obtained via
+ * sg_dma_{address,length}(SG).
+ *
+ * NOTE: An implementation may be able to use a smaller number of
+ *       DMA address/length pairs than there are SG table elements.
+ *       (for example via virtual mapping capabilities)
+ *       The routine returns the number of addr/length pairs actually
+ *       used, at most nents.
+ *
+ * Device ownership issues as mentioned above for swiotlb_map_page are the
+ * same here.
+ */
+int
+swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+                    enum dma_data_direction dir, unsigned long attrs)
+{
+       struct scatterlist *sg;
+       int i;
+
+       BUG_ON(dir == DMA_NONE);
+
+       for_each_sg(sgl, sg, nelems, i) {
+               phys_addr_t paddr = sg_phys(sg);
+               dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
+
+               if (swiotlb_force == SWIOTLB_FORCE ||
+                   !dma_capable(hwdev, dev_addr, sg->length)) {
+                       phys_addr_t map = map_single(hwdev, sg_phys(sg),
+                                                    sg->length, dir, attrs);
+                       if (map == SWIOTLB_MAP_ERROR) {
+                               /* Don't panic here, we expect map_sg users
+                                  to do proper error handling. */
+                               swiotlb_full(hwdev, sg->length, dir, 0);
+                               attrs |= DMA_ATTR_SKIP_CPU_SYNC;
+                               swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
+                                                      attrs);
+                               sg_dma_len(sgl) = 0;
+                               return 0;
+                       }
+                       sg->dma_address = __phys_to_dma(hwdev, map);
+               } else
+                       sg->dma_address = dev_addr;
+               sg_dma_len(sg) = sg->length;
+       }
+       return nelems;
+}
+
+/*
+ * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
+ * concerning calls here are the same as for swiotlb_unmap_page() above.
+ */
+void
+swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
+                      int nelems, enum dma_data_direction dir,
+                      unsigned long attrs)
+{
+       struct scatterlist *sg;
+       int i;
+
+       BUG_ON(dir == DMA_NONE);
+
+       for_each_sg(sgl, sg, nelems, i)
+               unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
+                            attrs);
+}
+
+/*
+ * Make physical memory consistent for a set of streaming mode DMA translations
+ * after a transfer.
+ *
+ * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
+ * and usage.
+ */
+static void
+swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
+               int nelems, enum dma_data_direction dir,
+               enum dma_sync_target target)
+{
+       struct scatterlist *sg;
+       int i;
+
+       for_each_sg(sgl, sg, nelems, i)
+               swiotlb_sync_single(hwdev, sg->dma_address,
+                                   sg_dma_len(sg), dir, target);
+}
+
+void
+swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
+                       int nelems, enum dma_data_direction dir)
+{
+       swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
+}
+
+void
+swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
+                          int nelems, enum dma_data_direction dir)
+{
+       swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
+}
+
+int
+swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
+{
+       return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
+}
+
+/*
+ * Return whether the given device DMA address mask can be supported
+ * properly.  For example, if your device can only drive the low 24-bits
+ * during bus mastering, then you would pass 0x00ffffff as the mask to
+ * this function.
+ */
+int
+swiotlb_dma_supported(struct device *hwdev, u64 mask)
+{
+       return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+}
+
+void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+               gfp_t gfp, unsigned long attrs)
+{
+       void *vaddr;
+
+       /* temporary workaround: */
+       if (gfp & __GFP_NOWARN)
+               attrs |= DMA_ATTR_NO_WARN;
+
+       /*
+        * Don't print a warning when the first allocation attempt fails.
+        * swiotlb_alloc_coherent() will print a warning when the DMA memory
+        * allocation ultimately failed.
+        */
+       gfp |= __GFP_NOWARN;
+
+       vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
+       if (!vaddr)
+               vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
+       return vaddr;
+}
+
+void swiotlb_free(struct device *dev, size_t size, void *vaddr,
+               dma_addr_t dma_addr, unsigned long attrs)
+{
+       if (!swiotlb_free_buffer(dev, size, dma_addr))
+               dma_direct_free(dev, size, vaddr, dma_addr, attrs);
+}
+
+const struct dma_map_ops swiotlb_dma_ops = {
+       .mapping_error          = swiotlb_dma_mapping_error,
+       .alloc                  = swiotlb_alloc,
+       .free                   = swiotlb_free,
+       .sync_single_for_cpu    = swiotlb_sync_single_for_cpu,
+       .sync_single_for_device = swiotlb_sync_single_for_device,
+       .sync_sg_for_cpu        = swiotlb_sync_sg_for_cpu,
+       .sync_sg_for_device     = swiotlb_sync_sg_for_device,
+       .map_sg                 = swiotlb_map_sg_attrs,
+       .unmap_sg               = swiotlb_unmap_sg_attrs,
+       .map_page               = swiotlb_map_page,
+       .unmap_page             = swiotlb_unmap_page,
+       .dma_supported          = dma_direct_supported,
+};
diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c
new file mode 100644 (file)
index 0000000..631ddec
--- /dev/null
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DMA operations that map to virtual addresses without flushing memory.
+ */
+#include <linux/export.h>
+#include <linux/mm.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+
+static void *dma_virt_alloc(struct device *dev, size_t size,
+                           dma_addr_t *dma_handle, gfp_t gfp,
+                           unsigned long attrs)
+{
+       void *ret;
+
+       ret = (void *)__get_free_pages(gfp, get_order(size));
+       if (ret)
+               *dma_handle = (uintptr_t)ret;
+       return ret;
+}
+
+static void dma_virt_free(struct device *dev, size_t size,
+                         void *cpu_addr, dma_addr_t dma_addr,
+                         unsigned long attrs)
+{
+       free_pages((unsigned long)cpu_addr, get_order(size));
+}
+
+static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
+                                   unsigned long offset, size_t size,
+                                   enum dma_data_direction dir,
+                                   unsigned long attrs)
+{
+       return (uintptr_t)(page_address(page) + offset);
+}
+
+static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
+                          int nents, enum dma_data_direction dir,
+                          unsigned long attrs)
+{
+       int i;
+       struct scatterlist *sg;
+
+       for_each_sg(sgl, sg, nents, i) {
+               BUG_ON(!sg_page(sg));
+               sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
+               sg_dma_len(sg) = sg->length;
+       }
+
+       return nents;
+}
+
+const struct dma_map_ops dma_virt_ops = {
+       .alloc                  = dma_virt_alloc,
+       .free                   = dma_virt_free,
+       .map_page               = dma_virt_map_page,
+       .map_sg                 = dma_virt_map_sg,
+};
+EXPORT_SYMBOL(dma_virt_ops);
index 4dadeb3..6f63613 100644 (file)
@@ -55,6 +55,7 @@ static const struct irq_bit_descr irqchip_flags[] = {
        BIT_MASK_DESCR(IRQCHIP_SKIP_SET_WAKE),
        BIT_MASK_DESCR(IRQCHIP_ONESHOT_SAFE),
        BIT_MASK_DESCR(IRQCHIP_EOI_THREADED),
+       BIT_MASK_DESCR(IRQCHIP_SUPPORTS_LEVEL_MSI),
 };
 
 static void
index edcac5d..5fa4d31 100644 (file)
@@ -1265,11 +1265,11 @@ unsigned long lockdep_count_forward_deps(struct lock_class *class)
        this.parent = NULL;
        this.class = class;
 
-       local_irq_save(flags);
+       raw_local_irq_save(flags);
        arch_spin_lock(&lockdep_lock);
        ret = __lockdep_count_forward_deps(&this);
        arch_spin_unlock(&lockdep_lock);
-       local_irq_restore(flags);
+       raw_local_irq_restore(flags);
 
        return ret;
 }
@@ -1292,11 +1292,11 @@ unsigned long lockdep_count_backward_deps(struct lock_class *class)
        this.parent = NULL;
        this.class = class;
 
-       local_irq_save(flags);
+       raw_local_irq_save(flags);
        arch_spin_lock(&lockdep_lock);
        ret = __lockdep_count_backward_deps(&this);
        arch_spin_unlock(&lockdep_lock);
-       local_irq_restore(flags);
+       raw_local_irq_restore(flags);
 
        return ret;
 }
@@ -4411,7 +4411,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
        if (unlikely(!debug_locks))
                return;
 
-       local_irq_save(flags);
+       raw_local_irq_save(flags);
        for (i = 0; i < curr->lockdep_depth; i++) {
                hlock = curr->held_locks + i;
 
@@ -4422,7 +4422,7 @@ void debug_check_no_locks_freed(const void *mem_from, unsigned long mem_len)
                print_freed_lock_bug(curr, mem_from, mem_from + mem_len, hlock);
                break;
        }
-       local_irq_restore(flags);
+       raw_local_irq_restore(flags);
 }
 EXPORT_SYMBOL_GPL(debug_check_no_locks_freed);
 
index bc1e507..776308d 100644 (file)
@@ -181,6 +181,7 @@ void down_read_non_owner(struct rw_semaphore *sem)
        might_sleep();
 
        __down_read(sem);
+       rwsem_set_reader_owned(sem);
 }
 
 EXPORT_SYMBOL(down_read_non_owner);
index de2f57f..900dcfe 100644 (file)
@@ -139,9 +139,13 @@ static void __local_bh_enable(unsigned int cnt)
 {
        lockdep_assert_irqs_disabled();
 
+       if (preempt_count() == cnt)
+               trace_preempt_on(CALLER_ADDR0, get_lock_parent_ip());
+
        if (softirq_count() == (cnt & SOFTIRQ_MASK))
                trace_softirqs_on(_RET_IP_);
-       preempt_count_sub(cnt);
+
+       __preempt_count_sub(cnt);
 }
 
 /*
index 055a4a7..3e93c54 100644 (file)
@@ -1659,7 +1659,7 @@ EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts)
 {
        switch(restart->nanosleep.type) {
-#ifdef CONFIG_COMPAT
+#ifdef CONFIG_COMPAT_32BIT_TIME
        case TT_COMPAT:
                if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp))
                        return -EFAULT;
index 5a6251a..9cdf54b 100644 (file)
@@ -604,7 +604,6 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
        /*
         * Disarm any old timer after extracting its expiry time.
         */
-       lockdep_assert_irqs_disabled();
 
        ret = 0;
        old_incr = timer->it.cpu.incr;
@@ -1049,7 +1048,6 @@ static void posix_cpu_timer_rearm(struct k_itimer *timer)
        /*
         * Now re-arm for the new expiry time.
         */
-       lockdep_assert_irqs_disabled();
        arm_timer(timer);
 unlock:
        unlock_task_sighand(p, &flags);
index 6fa9921..2b41e8e 100644 (file)
@@ -28,6 +28,7 @@
  */
 
 #include <linux/export.h>
+#include <linux/kernel.h>
 #include <linux/timex.h>
 #include <linux/capability.h>
 #include <linux/timekeeper_internal.h>
@@ -314,9 +315,10 @@ unsigned int jiffies_to_msecs(const unsigned long j)
        return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC);
 #else
 # if BITS_PER_LONG == 32
-       return (HZ_TO_MSEC_MUL32 * j) >> HZ_TO_MSEC_SHR32;
+       return (HZ_TO_MSEC_MUL32 * j + (1ULL << HZ_TO_MSEC_SHR32) - 1) >>
+              HZ_TO_MSEC_SHR32;
 # else
-       return (j * HZ_TO_MSEC_NUM) / HZ_TO_MSEC_DEN;
+       return DIV_ROUND_UP(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
 # endif
 #endif
 }
index c9336e9..a0079b4 100644 (file)
@@ -1360,8 +1360,6 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 void
 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 {
-       struct ring_buffer *buf;
-
        if (tr->stop_count)
                return;
 
@@ -1375,9 +1373,7 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
 
        arch_spin_lock(&tr->max_lock);
 
-       buf = tr->trace_buffer.buffer;
-       tr->trace_buffer.buffer = tr->max_buffer.buffer;
-       tr->max_buffer.buffer = buf;
+       swap(tr->trace_buffer.buffer, tr->max_buffer.buffer);
 
        __update_max_tr(tr, tsk, cpu);
        arch_spin_unlock(&tr->max_lock);
index e1c818d..0dceb77 100644 (file)
@@ -78,7 +78,8 @@ static const char * ops[] = { OPS };
        C(TOO_MANY_PREDS,       "Too many terms in predicate expression"), \
        C(INVALID_FILTER,       "Meaningless filter expression"),       \
        C(IP_FIELD_ONLY,        "Only 'ip' field is supported for function trace"), \
-       C(INVALID_VALUE,        "Invalid value (did you forget quotes)?"),
+       C(INVALID_VALUE,        "Invalid value (did you forget quotes)?"), \
+       C(NO_FILTER,            "No filter found"),
 
 #undef C
 #define C(a, b)                FILT_ERR_##a
@@ -550,6 +551,13 @@ predicate_parse(const char *str, int nr_parens, int nr_preds,
                goto out_free;
        }
 
+       if (!N) {
+               /* No program? */
+               ret = -EINVAL;
+               parse_error(pe, FILT_ERR_NO_FILTER, ptr - str);
+               goto out_free;
+       }
+
        prog[N].pred = NULL;                                    /* #13 */
        prog[N].target = 1;             /* TRUE */
        prog[N+1].pred = NULL;
index e34b04b..706836e 100644 (file)
@@ -420,60 +420,15 @@ config HAS_IOPORT_MAP
        depends on HAS_IOMEM && !NO_IOPORT_MAP
        default y
 
-config HAS_DMA
-       bool
-       depends on !NO_DMA
-       default y
+source "kernel/dma/Kconfig"
 
 config SGL_ALLOC
        bool
        default n
 
-config NEED_SG_DMA_LENGTH
-       bool
-
-config NEED_DMA_MAP_STATE
-       bool
-
-config ARCH_DMA_ADDR_T_64BIT
-       def_bool 64BIT || PHYS_ADDR_T_64BIT
-
 config IOMMU_HELPER
        bool
 
-config ARCH_HAS_SYNC_DMA_FOR_DEVICE
-       bool
-
-config ARCH_HAS_SYNC_DMA_FOR_CPU
-       bool
-       select NEED_DMA_MAP_STATE
-
-config DMA_DIRECT_OPS
-       bool
-       depends on HAS_DMA
-
-config DMA_NONCOHERENT_OPS
-       bool
-       depends on HAS_DMA
-       select DMA_DIRECT_OPS
-
-config DMA_NONCOHERENT_MMAP
-       bool
-       depends on DMA_NONCOHERENT_OPS
-
-config DMA_NONCOHERENT_CACHE_SYNC
-       bool
-       depends on DMA_NONCOHERENT_OPS
-
-config DMA_VIRT_OPS
-       bool
-       depends on HAS_DMA
-
-config SWIOTLB
-       bool
-       select DMA_DIRECT_OPS
-       select NEED_DMA_MAP_STATE
-
 config CHECK_SIGNATURE
        bool
 
index 956b320..90dc552 100644 (file)
@@ -23,15 +23,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
         sha1.o chacha20.o irq_regs.o argv_split.o \
         flex_proportions.o ratelimit.o show_mem.o \
         is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-        earlycpio.o seq_buf.o siphash.o \
+        earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
         nmi_backtrace.o nodemask.o win_minmax.o
 
 lib-$(CONFIG_PRINTK) += dump_stack.o
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
-lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o
-lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o
-lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o
 
 lib-y  += kobject.o klist.o
 obj-y  += lockref.o
@@ -98,10 +95,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
 obj-$(CONFIG_DEBUG_LIST) += list_debug.o
 obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
 
-ifneq ($(CONFIG_HAVE_DEC_LOCK),y)
-  lib-y += dec_and_lock.o
-endif
-
 obj-$(CONFIG_BITREVERSE) += bitrev.o
 obj-$(CONFIG_RATIONAL) += rational.o
 obj-$(CONFIG_CRC_CCITT)        += crc-ccitt.o
@@ -148,7 +141,6 @@ obj-$(CONFIG_SMP) += percpu_counter.o
 obj-$(CONFIG_AUDIT_GENERIC) += audit.o
 obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
-obj-$(CONFIG_SWIOTLB) += swiotlb.o
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
@@ -169,8 +161,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o
 
 obj-$(CONFIG_LRU_CACHE) += lru_cache.o
 
-obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o
-
 obj-$(CONFIG_GENERIC_CSUM) += checksum.o
 
 obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o
index 347fa7a..9555b68 100644 (file)
@@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
 }
 
 EXPORT_SYMBOL(_atomic_dec_and_lock);
+
+int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock,
+                                unsigned long *flags)
+{
+       /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */
+       if (atomic_add_unless(atomic, -1, 1))
+               return 0;
+
+       /* Otherwise do it the slow way */
+       spin_lock_irqsave(lock, *flags);
+       if (atomic_dec_and_test(atomic))
+               return 1;
+       spin_unlock_irqrestore(lock, *flags);
+       return 0;
+}
+EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave);
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
deleted file mode 100644 (file)
index c007d25..0000000
+++ /dev/null
@@ -1,1773 +0,0 @@
-/*
- * Copyright (C) 2008 Advanced Micro Devices, Inc.
- *
- * Author: Joerg Roedel <joerg.roedel@amd.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 as published
- * by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- */
-
-#include <linux/sched/task_stack.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
-#include <linux/sched/task.h>
-#include <linux/stacktrace.h>
-#include <linux/dma-debug.h>
-#include <linux/spinlock.h>
-#include <linux/vmalloc.h>
-#include <linux/debugfs.h>
-#include <linux/uaccess.h>
-#include <linux/export.h>
-#include <linux/device.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/ctype.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-
-#include <asm/sections.h>
-
-#define HASH_SIZE       1024ULL
-#define HASH_FN_SHIFT   13
-#define HASH_FN_MASK    (HASH_SIZE - 1)
-
-/* allow architectures to override this if absolutely required */
-#ifndef PREALLOC_DMA_DEBUG_ENTRIES
-#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16)
-#endif
-
-enum {
-       dma_debug_single,
-       dma_debug_page,
-       dma_debug_sg,
-       dma_debug_coherent,
-       dma_debug_resource,
-};
-
-enum map_err_types {
-       MAP_ERR_CHECK_NOT_APPLICABLE,
-       MAP_ERR_NOT_CHECKED,
-       MAP_ERR_CHECKED,
-};
-
-#define DMA_DEBUG_STACKTRACE_ENTRIES 5
-
-/**
- * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
- * @list: node on pre-allocated free_entries list
- * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
- * @type: single, page, sg, coherent
- * @pfn: page frame of the start address
- * @offset: offset of mapping relative to pfn
- * @size: length of the mapping
- * @direction: enum dma_data_direction
- * @sg_call_ents: 'nents' from dma_map_sg
- * @sg_mapped_ents: 'mapped_ents' from dma_map_sg
- * @map_err_type: track whether dma_mapping_error() was checked
- * @stacktrace: support backtraces when a violation is detected
- */
-struct dma_debug_entry {
-       struct list_head list;
-       struct device    *dev;
-       int              type;
-       unsigned long    pfn;
-       size_t           offset;
-       u64              dev_addr;
-       u64              size;
-       int              direction;
-       int              sg_call_ents;
-       int              sg_mapped_ents;
-       enum map_err_types  map_err_type;
-#ifdef CONFIG_STACKTRACE
-       struct           stack_trace stacktrace;
-       unsigned long    st_entries[DMA_DEBUG_STACKTRACE_ENTRIES];
-#endif
-};
-
-typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *);
-
-struct hash_bucket {
-       struct list_head list;
-       spinlock_t lock;
-} ____cacheline_aligned_in_smp;
-
-/* Hash list to save the allocated dma addresses */
-static struct hash_bucket dma_entry_hash[HASH_SIZE];
-/* List of pre-allocated dma_debug_entry's */
-static LIST_HEAD(free_entries);
-/* Lock for the list above */
-static DEFINE_SPINLOCK(free_entries_lock);
-
-/* Global disable flag - will be set in case of an error */
-static bool global_disable __read_mostly;
-
-/* Early initialization disable flag, set at the end of dma_debug_init */
-static bool dma_debug_initialized __read_mostly;
-
-static inline bool dma_debug_disabled(void)
-{
-       return global_disable || !dma_debug_initialized;
-}
-
-/* Global error count */
-static u32 error_count;
-
-/* Global error show enable*/
-static u32 show_all_errors __read_mostly;
-/* Number of errors to show */
-static u32 show_num_errors = 1;
-
-static u32 num_free_entries;
-static u32 min_free_entries;
-static u32 nr_total_entries;
-
-/* number of preallocated entries requested by kernel cmdline */
-static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
-
-/* debugfs dentry's for the stuff above */
-static struct dentry *dma_debug_dent        __read_mostly;
-static struct dentry *global_disable_dent   __read_mostly;
-static struct dentry *error_count_dent      __read_mostly;
-static struct dentry *show_all_errors_dent  __read_mostly;
-static struct dentry *show_num_errors_dent  __read_mostly;
-static struct dentry *num_free_entries_dent __read_mostly;
-static struct dentry *min_free_entries_dent __read_mostly;
-static struct dentry *filter_dent           __read_mostly;
-
-/* per-driver filter related state */
-
-#define NAME_MAX_LEN   64
-
-static char                  current_driver_name[NAME_MAX_LEN] __read_mostly;
-static struct device_driver *current_driver                    __read_mostly;
-
-static DEFINE_RWLOCK(driver_name_lock);
-
-static const char *const maperr2str[] = {
-       [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable",
-       [MAP_ERR_NOT_CHECKED] = "dma map error not checked",
-       [MAP_ERR_CHECKED] = "dma map error checked",
-};
-
-static const char *type2name[5] = { "single", "page",
-                                   "scather-gather", "coherent",
-                                   "resource" };
-
-static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
-                                  "DMA_FROM_DEVICE", "DMA_NONE" };
-
-/*
- * The access to some variables in this macro is racy. We can't use atomic_t
- * here because all these variables are exported to debugfs. Some of them even
- * writeable. This is also the reason why a lock won't help much. But anyway,
- * the races are no big deal. Here is why:
- *
- *   error_count: the addition is racy, but the worst thing that can happen is
- *                that we don't count some errors
- *   show_num_errors: the subtraction is racy. Also no big deal because in
- *                    worst case this will result in one warning more in the
- *                    system log than the user configured. This variable is
- *                    writeable via debugfs.
- */
-static inline void dump_entry_trace(struct dma_debug_entry *entry)
-{
-#ifdef CONFIG_STACKTRACE
-       if (entry) {
-               pr_warning("Mapped at:\n");
-               print_stack_trace(&entry->stacktrace, 0);
-       }
-#endif
-}
-
-static bool driver_filter(struct device *dev)
-{
-       struct device_driver *drv;
-       unsigned long flags;
-       bool ret;
-
-       /* driver filter off */
-       if (likely(!current_driver_name[0]))
-               return true;
-
-       /* driver filter on and initialized */
-       if (current_driver && dev && dev->driver == current_driver)
-               return true;
-
-       /* driver filter on, but we can't filter on a NULL device... */
-       if (!dev)
-               return false;
-
-       if (current_driver || !current_driver_name[0])
-               return false;
-
-       /* driver filter on but not yet initialized */
-       drv = dev->driver;
-       if (!drv)
-               return false;
-
-       /* lock to protect against change of current_driver_name */
-       read_lock_irqsave(&driver_name_lock, flags);
-
-       ret = false;
-       if (drv->name &&
-           strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) {
-               current_driver = drv;
-               ret = true;
-       }
-
-       read_unlock_irqrestore(&driver_name_lock, flags);
-
-       return ret;
-}
-
-#define err_printk(dev, entry, format, arg...) do {                    \
-               error_count += 1;                                       \
-               if (driver_filter(dev) &&                               \
-                   (show_all_errors || show_num_errors > 0)) {         \
-                       WARN(1, "%s %s: " format,                       \
-                            dev ? dev_driver_string(dev) : "NULL",     \
-                            dev ? dev_name(dev) : "NULL", ## arg);     \
-                       dump_entry_trace(entry);                        \
-               }                                                       \
-               if (!show_all_errors && show_num_errors > 0)            \
-                       show_num_errors -= 1;                           \
-       } while (0);
-
-/*
- * Hash related functions
- *
- * Every DMA-API request is saved into a struct dma_debug_entry. To
- * have quick access to these structs they are stored into a hash.
- */
-static int hash_fn(struct dma_debug_entry *entry)
-{
-       /*
-        * Hash function is based on the dma address.
-        * We use bits 20-27 here as the index into the hash
-        */
-       return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK;
-}
-
-/*
- * Request exclusive access to a hash bucket for a given dma_debug_entry.
- */
-static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry,
-                                          unsigned long *flags)
-       __acquires(&dma_entry_hash[idx].lock)
-{
-       int idx = hash_fn(entry);
-       unsigned long __flags;
-
-       spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags);
-       *flags = __flags;
-       return &dma_entry_hash[idx];
-}
-
-/*
- * Give up exclusive access to the hash bucket
- */
-static void put_hash_bucket(struct hash_bucket *bucket,
-                           unsigned long *flags)
-       __releases(&bucket->lock)
-{
-       unsigned long __flags = *flags;
-
-       spin_unlock_irqrestore(&bucket->lock, __flags);
-}
-
-static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b)
-{
-       return ((a->dev_addr == b->dev_addr) &&
-               (a->dev == b->dev)) ? true : false;
-}
-
-static bool containing_match(struct dma_debug_entry *a,
-                            struct dma_debug_entry *b)
-{
-       if (a->dev != b->dev)
-               return false;
-
-       if ((b->dev_addr <= a->dev_addr) &&
-           ((b->dev_addr + b->size) >= (a->dev_addr + a->size)))
-               return true;
-
-       return false;
-}
-
-/*
- * Search a given entry in the hash bucket list
- */
-static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket,
-                                                 struct dma_debug_entry *ref,
-                                                 match_fn match)
-{
-       struct dma_debug_entry *entry, *ret = NULL;
-       int matches = 0, match_lvl, last_lvl = -1;
-
-       list_for_each_entry(entry, &bucket->list, list) {
-               if (!match(ref, entry))
-                       continue;
-
-               /*
-                * Some drivers map the same physical address multiple
-                * times. Without a hardware IOMMU this results in the
-                * same device addresses being put into the dma-debug
-                * hash multiple times too. This can result in false
-                * positives being reported. Therefore we implement a
-                * best-fit algorithm here which returns the entry from
-                * the hash which fits best to the reference value
-                * instead of the first-fit.
-                */
-               matches += 1;
-               match_lvl = 0;
-               entry->size         == ref->size         ? ++match_lvl : 0;
-               entry->type         == ref->type         ? ++match_lvl : 0;
-               entry->direction    == ref->direction    ? ++match_lvl : 0;
-               entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0;
-
-               if (match_lvl == 4) {
-                       /* perfect-fit - return the result */
-                       return entry;
-               } else if (match_lvl > last_lvl) {
-                       /*
-                        * We found an entry that fits better then the
-                        * previous one or it is the 1st match.
-                        */
-                       last_lvl = match_lvl;
-                       ret      = entry;
-               }
-       }
-
-       /*
-        * If we have multiple matches but no perfect-fit, just return
-        * NULL.
-        */
-       ret = (matches == 1) ? ret : NULL;
-
-       return ret;
-}
-
-static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket,
-                                                struct dma_debug_entry *ref)
-{
-       return __hash_bucket_find(bucket, ref, exact_match);
-}
-
-static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
-                                                  struct dma_debug_entry *ref,
-                                                  unsigned long *flags)
-{
-
-       unsigned int max_range = dma_get_max_seg_size(ref->dev);
-       struct dma_debug_entry *entry, index = *ref;
-       unsigned int range = 0;
-
-       while (range <= max_range) {
-               entry = __hash_bucket_find(*bucket, ref, containing_match);
-
-               if (entry)
-                       return entry;
-
-               /*
-                * Nothing found, go back a hash bucket
-                */
-               put_hash_bucket(*bucket, flags);
-               range          += (1 << HASH_FN_SHIFT);
-               index.dev_addr -= (1 << HASH_FN_SHIFT);
-               *bucket = get_hash_bucket(&index, flags);
-       }
-
-       return NULL;
-}
-
-/*
- * Add an entry to a hash bucket
- */
-static void hash_bucket_add(struct hash_bucket *bucket,
-                           struct dma_debug_entry *entry)
-{
-       list_add_tail(&entry->list, &bucket->list);
-}
-
-/*
- * Remove entry from a hash bucket list
- */
-static void hash_bucket_del(struct dma_debug_entry *entry)
-{
-       list_del(&entry->list);
-}
-
-static unsigned long long phys_addr(struct dma_debug_entry *entry)
-{
-       if (entry->type == dma_debug_resource)
-               return __pfn_to_phys(entry->pfn) + entry->offset;
-
-       return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset;
-}
-
-/*
- * Dump mapping entries for debugging purposes
- */
-void debug_dma_dump_mappings(struct device *dev)
-{
-       int idx;
-
-       for (idx = 0; idx < HASH_SIZE; idx++) {
-               struct hash_bucket *bucket = &dma_entry_hash[idx];
-               struct dma_debug_entry *entry;
-               unsigned long flags;
-
-               spin_lock_irqsave(&bucket->lock, flags);
-
-               list_for_each_entry(entry, &bucket->list, list) {
-                       if (!dev || dev == entry->dev) {
-                               dev_info(entry->dev,
-                                        "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
-                                        type2name[entry->type], idx,
-                                        phys_addr(entry), entry->pfn,
-                                        entry->dev_addr, entry->size,
-                                        dir2name[entry->direction],
-                                        maperr2str[entry->map_err_type]);
-                       }
-               }
-
-               spin_unlock_irqrestore(&bucket->lock, flags);
-       }
-}
-
-/*
- * For each mapping (initial cacheline in the case of
- * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
- * scatterlist, or the cacheline specified in dma_map_single) insert
- * into this tree using the cacheline as the key. At
- * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry.  If
- * the entry already exists at insertion time add a tag as a reference
- * count for the overlapping mappings.  For now, the overlap tracking
- * just ensures that 'unmaps' balance 'maps' before marking the
- * cacheline idle, but we should also be flagging overlaps as an API
- * violation.
- *
- * Memory usage is mostly constrained by the maximum number of available
- * dma-debug entries in that we need a free dma_debug_entry before
- * inserting into the tree.  In the case of dma_map_page and
- * dma_alloc_coherent there is only one dma_debug_entry and one
- * dma_active_cacheline entry to track per event.  dma_map_sg(), on the
- * other hand, consumes a single dma_debug_entry, but inserts 'nents'
- * entries into the tree.
- *
- * At any time debug_dma_assert_idle() can be called to trigger a
- * warning if any cachelines in the given page are in the active set.
- */
-static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
-static DEFINE_SPINLOCK(radix_lock);
-#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
-#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
-#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT)
-
-static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry)
-{
-       return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) +
-               (entry->offset >> L1_CACHE_SHIFT);
-}
-
-static int active_cacheline_read_overlap(phys_addr_t cln)
-{
-       int overlap = 0, i;
-
-       for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-               if (radix_tree_tag_get(&dma_active_cacheline, cln, i))
-                       overlap |= 1 << i;
-       return overlap;
-}
-
-static int active_cacheline_set_overlap(phys_addr_t cln, int overlap)
-{
-       int i;
-
-       if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0)
-               return overlap;
-
-       for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--)
-               if (overlap & 1 << i)
-                       radix_tree_tag_set(&dma_active_cacheline, cln, i);
-               else
-                       radix_tree_tag_clear(&dma_active_cacheline, cln, i);
-
-       return overlap;
-}
-
-static void active_cacheline_inc_overlap(phys_addr_t cln)
-{
-       int overlap = active_cacheline_read_overlap(cln);
-
-       overlap = active_cacheline_set_overlap(cln, ++overlap);
-
-       /* If we overflowed the overlap counter then we're potentially
-        * leaking dma-mappings.  Otherwise, if maps and unmaps are
-        * balanced then this overflow may cause false negatives in
-        * debug_dma_assert_idle() as the cacheline may be marked idle
-        * prematurely.
-        */
-       WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
-                 "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n",
-                 ACTIVE_CACHELINE_MAX_OVERLAP, &cln);
-}
-
-static int active_cacheline_dec_overlap(phys_addr_t cln)
-{
-       int overlap = active_cacheline_read_overlap(cln);
-
-       return active_cacheline_set_overlap(cln, --overlap);
-}
-
-static int active_cacheline_insert(struct dma_debug_entry *entry)
-{
-       phys_addr_t cln = to_cacheline_number(entry);
-       unsigned long flags;
-       int rc;
-
-       /* If the device is not writing memory then we don't have any
-        * concerns about the cpu consuming stale data.  This mitigates
-        * legitimate usages of overlapping mappings.
-        */
-       if (entry->direction == DMA_TO_DEVICE)
-               return 0;
-
-       spin_lock_irqsave(&radix_lock, flags);
-       rc = radix_tree_insert(&dma_active_cacheline, cln, entry);
-       if (rc == -EEXIST)
-               active_cacheline_inc_overlap(cln);
-       spin_unlock_irqrestore(&radix_lock, flags);
-
-       return rc;
-}
-
-static void active_cacheline_remove(struct dma_debug_entry *entry)
-{
-       phys_addr_t cln = to_cacheline_number(entry);
-       unsigned long flags;
-
-       /* ...mirror the insert case */
-       if (entry->direction == DMA_TO_DEVICE)
-               return;
-
-       spin_lock_irqsave(&radix_lock, flags);
-       /* since we are counting overlaps the final put of the
-        * cacheline will occur when the overlap count is 0.
-        * active_cacheline_dec_overlap() returns -1 in that case
-        */
-       if (active_cacheline_dec_overlap(cln) < 0)
-               radix_tree_delete(&dma_active_cacheline, cln);
-       spin_unlock_irqrestore(&radix_lock, flags);
-}
-
-/**
- * debug_dma_assert_idle() - assert that a page is not undergoing dma
- * @page: page to lookup in the dma_active_cacheline tree
- *
- * Place a call to this routine in cases where the cpu touching the page
- * before the dma completes (page is dma_unmapped) will lead to data
- * corruption.
- */
-void debug_dma_assert_idle(struct page *page)
-{
-       static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
-       struct dma_debug_entry *entry = NULL;
-       void **results = (void **) &ents;
-       unsigned int nents, i;
-       unsigned long flags;
-       phys_addr_t cln;
-
-       if (dma_debug_disabled())
-               return;
-
-       if (!page)
-               return;
-
-       cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
-       spin_lock_irqsave(&radix_lock, flags);
-       nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
-                                      CACHELINES_PER_PAGE);
-       for (i = 0; i < nents; i++) {
-               phys_addr_t ent_cln = to_cacheline_number(ents[i]);
-
-               if (ent_cln == cln) {
-                       entry = ents[i];
-                       break;
-               } else if (ent_cln >= cln + CACHELINES_PER_PAGE)
-                       break;
-       }
-       spin_unlock_irqrestore(&radix_lock, flags);
-
-       if (!entry)
-               return;
-
-       cln = to_cacheline_number(entry);
-       err_printk(entry->dev, entry,
-                  "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n",
-                  &cln);
-}
-
-/*
- * Wrapper function for adding an entry to the hash.
- * This function takes care of locking itself.
- */
-static void add_dma_entry(struct dma_debug_entry *entry)
-{
-       struct hash_bucket *bucket;
-       unsigned long flags;
-       int rc;
-
-       bucket = get_hash_bucket(entry, &flags);
-       hash_bucket_add(bucket, entry);
-       put_hash_bucket(bucket, &flags);
-
-       rc = active_cacheline_insert(entry);
-       if (rc == -ENOMEM) {
-               pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n");
-               global_disable = true;
-       }
-
-       /* TODO: report -EEXIST errors here as overlapping mappings are
-        * not supported by the DMA API
-        */
-}
-
-static struct dma_debug_entry *__dma_entry_alloc(void)
-{
-       struct dma_debug_entry *entry;
-
-       entry = list_entry(free_entries.next, struct dma_debug_entry, list);
-       list_del(&entry->list);
-       memset(entry, 0, sizeof(*entry));
-
-       num_free_entries -= 1;
-       if (num_free_entries < min_free_entries)
-               min_free_entries = num_free_entries;
-
-       return entry;
-}
-
-/* struct dma_entry allocator
- *
- * The next two functions implement the allocator for
- * struct dma_debug_entries.
- */
-static struct dma_debug_entry *dma_entry_alloc(void)
-{
-       struct dma_debug_entry *entry;
-       unsigned long flags;
-
-       spin_lock_irqsave(&free_entries_lock, flags);
-
-       if (list_empty(&free_entries)) {
-               global_disable = true;
-               spin_unlock_irqrestore(&free_entries_lock, flags);
-               pr_err("DMA-API: debugging out of memory - disabling\n");
-               return NULL;
-       }
-
-       entry = __dma_entry_alloc();
-
-       spin_unlock_irqrestore(&free_entries_lock, flags);
-
-#ifdef CONFIG_STACKTRACE
-       entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES;
-       entry->stacktrace.entries = entry->st_entries;
-       entry->stacktrace.skip = 2;
-       save_stack_trace(&entry->stacktrace);
-#endif
-
-       return entry;
-}
-
-static void dma_entry_free(struct dma_debug_entry *entry)
-{
-       unsigned long flags;
-
-       active_cacheline_remove(entry);
-
-       /*
-        * add to beginning of the list - this way the entries are
-        * more likely cache hot when they are reallocated.
-        */
-       spin_lock_irqsave(&free_entries_lock, flags);
-       list_add(&entry->list, &free_entries);
-       num_free_entries += 1;
-       spin_unlock_irqrestore(&free_entries_lock, flags);
-}
-
-int dma_debug_resize_entries(u32 num_entries)
-{
-       int i, delta, ret = 0;
-       unsigned long flags;
-       struct dma_debug_entry *entry;
-       LIST_HEAD(tmp);
-
-       spin_lock_irqsave(&free_entries_lock, flags);
-
-       if (nr_total_entries < num_entries) {
-               delta = num_entries - nr_total_entries;
-
-               spin_unlock_irqrestore(&free_entries_lock, flags);
-
-               for (i = 0; i < delta; i++) {
-                       entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-                       if (!entry)
-                               break;
-
-                       list_add_tail(&entry->list, &tmp);
-               }
-
-               spin_lock_irqsave(&free_entries_lock, flags);
-
-               list_splice(&tmp, &free_entries);
-               nr_total_entries += i;
-               num_free_entries += i;
-       } else {
-               delta = nr_total_entries - num_entries;
-
-               for (i = 0; i < delta && !list_empty(&free_entries); i++) {
-                       entry = __dma_entry_alloc();
-                       kfree(entry);
-               }
-
-               nr_total_entries -= i;
-       }
-
-       if (nr_total_entries != num_entries)
-               ret = 1;
-
-       spin_unlock_irqrestore(&free_entries_lock, flags);
-
-       return ret;
-}
-
-/*
- * DMA-API debugging init code
- *
- * The init code does two things:
- *   1. Initialize core data structures
- *   2. Preallocate a given number of dma_debug_entry structs
- */
-
-static int prealloc_memory(u32 num_entries)
-{
-       struct dma_debug_entry *entry, *next_entry;
-       int i;
-
-       for (i = 0; i < num_entries; ++i) {
-               entry = kzalloc(sizeof(*entry), GFP_KERNEL);
-               if (!entry)
-                       goto out_err;
-
-               list_add_tail(&entry->list, &free_entries);
-       }
-
-       num_free_entries = num_entries;
-       min_free_entries = num_entries;
-
-       pr_info("DMA-API: preallocated %d debug entries\n", num_entries);
-
-       return 0;
-
-out_err:
-
-       list_for_each_entry_safe(entry, next_entry, &free_entries, list) {
-               list_del(&entry->list);
-               kfree(entry);
-       }
-
-       return -ENOMEM;
-}
-
-static ssize_t filter_read(struct file *file, char __user *user_buf,
-                          size_t count, loff_t *ppos)
-{
-       char buf[NAME_MAX_LEN + 1];
-       unsigned long flags;
-       int len;
-
-       if (!current_driver_name[0])
-               return 0;
-
-       /*
-        * We can't copy to userspace directly because current_driver_name can
-        * only be read under the driver_name_lock with irqs disabled. So
-        * create a temporary copy first.
-        */
-       read_lock_irqsave(&driver_name_lock, flags);
-       len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name);
-       read_unlock_irqrestore(&driver_name_lock, flags);
-
-       return simple_read_from_buffer(user_buf, count, ppos, buf, len);
-}
-
-static ssize_t filter_write(struct file *file, const char __user *userbuf,
-                           size_t count, loff_t *ppos)
-{
-       char buf[NAME_MAX_LEN];
-       unsigned long flags;
-       size_t len;
-       int i;
-
-       /*
-        * We can't copy from userspace directly. Access to
-        * current_driver_name is protected with a write_lock with irqs
-        * disabled. Since copy_from_user can fault and may sleep we
-        * need to copy to temporary buffer first
-        */
-       len = min(count, (size_t)(NAME_MAX_LEN - 1));
-       if (copy_from_user(buf, userbuf, len))
-               return -EFAULT;
-
-       buf[len] = 0;
-
-       write_lock_irqsave(&driver_name_lock, flags);
-
-       /*
-        * Now handle the string we got from userspace very carefully.
-        * The rules are:
-        *         - only use the first token we got
-        *         - token delimiter is everything looking like a space
-        *           character (' ', '\n', '\t' ...)
-        *
-        */
-       if (!isalnum(buf[0])) {
-               /*
-                * If the first character userspace gave us is not
-                * alphanumerical then assume the filter should be
-                * switched off.
-                */
-               if (current_driver_name[0])
-                       pr_info("DMA-API: switching off dma-debug driver filter\n");
-               current_driver_name[0] = 0;
-               current_driver = NULL;
-               goto out_unlock;
-       }
-
-       /*
-        * Now parse out the first token and use it as the name for the
-        * driver to filter for.
-        */
-       for (i = 0; i < NAME_MAX_LEN - 1; ++i) {
-               current_driver_name[i] = buf[i];
-               if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0)
-                       break;
-       }
-       current_driver_name[i] = 0;
-       current_driver = NULL;
-
-       pr_info("DMA-API: enable driver filter for driver [%s]\n",
-               current_driver_name);
-
-out_unlock:
-       write_unlock_irqrestore(&driver_name_lock, flags);
-
-       return count;
-}
-
-static const struct file_operations filter_fops = {
-       .read  = filter_read,
-       .write = filter_write,
-       .llseek = default_llseek,
-};
-
-static int dma_debug_fs_init(void)
-{
-       dma_debug_dent = debugfs_create_dir("dma-api", NULL);
-       if (!dma_debug_dent) {
-               pr_err("DMA-API: can not create debugfs directory\n");
-               return -ENOMEM;
-       }
-
-       global_disable_dent = debugfs_create_bool("disabled", 0444,
-                       dma_debug_dent,
-                       &global_disable);
-       if (!global_disable_dent)
-               goto out_err;
-
-       error_count_dent = debugfs_create_u32("error_count", 0444,
-                       dma_debug_dent, &error_count);
-       if (!error_count_dent)
-               goto out_err;
-
-       show_all_errors_dent = debugfs_create_u32("all_errors", 0644,
-                       dma_debug_dent,
-                       &show_all_errors);
-       if (!show_all_errors_dent)
-               goto out_err;
-
-       show_num_errors_dent = debugfs_create_u32("num_errors", 0644,
-                       dma_debug_dent,
-                       &show_num_errors);
-       if (!show_num_errors_dent)
-               goto out_err;
-
-       num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444,
-                       dma_debug_dent,
-                       &num_free_entries);
-       if (!num_free_entries_dent)
-               goto out_err;
-
-       min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444,
-                       dma_debug_dent,
-                       &min_free_entries);
-       if (!min_free_entries_dent)
-               goto out_err;
-
-       filter_dent = debugfs_create_file("driver_filter", 0644,
-                                         dma_debug_dent, NULL, &filter_fops);
-       if (!filter_dent)
-               goto out_err;
-
-       return 0;
-
-out_err:
-       debugfs_remove_recursive(dma_debug_dent);
-
-       return -ENOMEM;
-}
-
-static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
-{
-       struct dma_debug_entry *entry;
-       unsigned long flags;
-       int count = 0, i;
-
-       for (i = 0; i < HASH_SIZE; ++i) {
-               spin_lock_irqsave(&dma_entry_hash[i].lock, flags);
-               list_for_each_entry(entry, &dma_entry_hash[i].list, list) {
-                       if (entry->dev == dev) {
-                               count += 1;
-                               *out_entry = entry;
-                       }
-               }
-               spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags);
-       }
-
-       return count;
-}
-
-static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
-{
-       struct device *dev = data;
-       struct dma_debug_entry *uninitialized_var(entry);
-       int count;
-
-       if (dma_debug_disabled())
-               return 0;
-
-       switch (action) {
-       case BUS_NOTIFY_UNBOUND_DRIVER:
-               count = device_dma_allocations(dev, &entry);
-               if (count == 0)
-                       break;
-               err_printk(dev, entry, "DMA-API: device driver has pending "
-                               "DMA allocations while released from device "
-                               "[count=%d]\n"
-                               "One of leaked entries details: "
-                               "[device address=0x%016llx] [size=%llu bytes] "
-                               "[mapped with %s] [mapped as %s]\n",
-                       count, entry->dev_addr, entry->size,
-                       dir2name[entry->direction], type2name[entry->type]);
-               break;
-       default:
-               break;
-       }
-
-       return 0;
-}
-
-void dma_debug_add_bus(struct bus_type *bus)
-{
-       struct notifier_block *nb;
-
-       if (dma_debug_disabled())
-               return;
-
-       nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL);
-       if (nb == NULL) {
-               pr_err("dma_debug_add_bus: out of memory\n");
-               return;
-       }
-
-       nb->notifier_call = dma_debug_device_change;
-
-       bus_register_notifier(bus, nb);
-}
-
-static int dma_debug_init(void)
-{
-       int i;
-
-       /* Do not use dma_debug_initialized here, since we really want to be
-        * called to set dma_debug_initialized
-        */
-       if (global_disable)
-               return 0;
-
-       for (i = 0; i < HASH_SIZE; ++i) {
-               INIT_LIST_HEAD(&dma_entry_hash[i].list);
-               spin_lock_init(&dma_entry_hash[i].lock);
-       }
-
-       if (dma_debug_fs_init() != 0) {
-               pr_err("DMA-API: error creating debugfs entries - disabling\n");
-               global_disable = true;
-
-               return 0;
-       }
-
-       if (prealloc_memory(nr_prealloc_entries) != 0) {
-               pr_err("DMA-API: debugging out of memory error - disabled\n");
-               global_disable = true;
-
-               return 0;
-       }
-
-       nr_total_entries = num_free_entries;
-
-       dma_debug_initialized = true;
-
-       pr_info("DMA-API: debugging enabled by kernel config\n");
-       return 0;
-}
-core_initcall(dma_debug_init);
-
-static __init int dma_debug_cmdline(char *str)
-{
-       if (!str)
-               return -EINVAL;
-
-       if (strncmp(str, "off", 3) == 0) {
-               pr_info("DMA-API: debugging disabled on kernel command line\n");
-               global_disable = true;
-       }
-
-       return 0;
-}
-
-static __init int dma_debug_entries_cmdline(char *str)
-{
-       if (!str)
-               return -EINVAL;
-       if (!get_option(&str, &nr_prealloc_entries))
-               nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
-       return 0;
-}
-
-__setup("dma_debug=", dma_debug_cmdline);
-__setup("dma_debug_entries=", dma_debug_entries_cmdline);
-
-static void check_unmap(struct dma_debug_entry *ref)
-{
-       struct dma_debug_entry *entry;
-       struct hash_bucket *bucket;
-       unsigned long flags;
-
-       bucket = get_hash_bucket(ref, &flags);
-       entry = bucket_find_exact(bucket, ref);
-
-       if (!entry) {
-               /* must drop lock before calling dma_mapping_error */
-               put_hash_bucket(bucket, &flags);
-
-               if (dma_mapping_error(ref->dev, ref->dev_addr)) {
-                       err_printk(ref->dev, NULL,
-                                  "DMA-API: device driver tries to free an "
-                                  "invalid DMA memory address\n");
-               } else {
-                       err_printk(ref->dev, NULL,
-                                  "DMA-API: device driver tries to free DMA "
-                                  "memory it has not allocated [device "
-                                  "address=0x%016llx] [size=%llu bytes]\n",
-                                  ref->dev_addr, ref->size);
-               }
-               return;
-       }
-
-       if (ref->size != entry->size) {
-               err_printk(ref->dev, entry, "DMA-API: device driver frees "
-                          "DMA memory with different size "
-                          "[device address=0x%016llx] [map size=%llu bytes] "
-                          "[unmap size=%llu bytes]\n",
-                          ref->dev_addr, entry->size, ref->size);
-       }
-
-       if (ref->type != entry->type) {
-               err_printk(ref->dev, entry, "DMA-API: device driver frees "
-                          "DMA memory with wrong function "
-                          "[device address=0x%016llx] [size=%llu bytes] "
-                          "[mapped as %s] [unmapped as %s]\n",
-                          ref->dev_addr, ref->size,
-                          type2name[entry->type], type2name[ref->type]);
-       } else if ((entry->type == dma_debug_coherent) &&
-                  (phys_addr(ref) != phys_addr(entry))) {
-               err_printk(ref->dev, entry, "DMA-API: device driver frees "
-                          "DMA memory with different CPU address "
-                          "[device address=0x%016llx] [size=%llu bytes] "
-                          "[cpu alloc address=0x%016llx] "
-                          "[cpu free address=0x%016llx]",
-                          ref->dev_addr, ref->size,
-                          phys_addr(entry),
-                          phys_addr(ref));
-       }
-
-       if (ref->sg_call_ents && ref->type == dma_debug_sg &&
-           ref->sg_call_ents != entry->sg_call_ents) {
-               err_printk(ref->dev, entry, "DMA-API: device driver frees "
-                          "DMA sg list with different entry count "
-                          "[map count=%d] [unmap count=%d]\n",
-                          entry->sg_call_ents, ref->sg_call_ents);
-       }
-
-       /*
-        * This may be no bug in reality - but most implementations of the
-        * DMA API don't handle this properly, so check for it here
-        */
-       if (ref->direction != entry->direction) {
-               err_printk(ref->dev, entry, "DMA-API: device driver frees "
-                          "DMA memory with different direction "
-                          "[device address=0x%016llx] [size=%llu bytes] "
-                          "[mapped with %s] [unmapped with %s]\n",
-                          ref->dev_addr, ref->size,
-                          dir2name[entry->direction],
-                          dir2name[ref->direction]);
-       }
-
-       /*
-        * Drivers should use dma_mapping_error() to check the returned
-        * addresses of dma_map_single() and dma_map_page().
-        * If not, print this warning message. See Documentation/DMA-API.txt.
-        */
-       if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
-               err_printk(ref->dev, entry,
-                          "DMA-API: device driver failed to check map error"
-                          "[device address=0x%016llx] [size=%llu bytes] "
-                          "[mapped as %s]",
-                          ref->dev_addr, ref->size,
-                          type2name[entry->type]);
-       }
-
-       hash_bucket_del(entry);
-       dma_entry_free(entry);
-
-       put_hash_bucket(bucket, &flags);
-}
-
-static void check_for_stack(struct device *dev,
-                           struct page *page, size_t offset)
-{
-       void *addr;
-       struct vm_struct *stack_vm_area = task_stack_vm_area(current);
-
-       if (!stack_vm_area) {
-               /* Stack is direct-mapped. */
-               if (PageHighMem(page))
-                       return;
-               addr = page_address(page) + offset;
-               if (object_is_on_stack(addr))
-                       err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr);
-       } else {
-               /* Stack is vmalloced. */
-               int i;
-
-               for (i = 0; i < stack_vm_area->nr_pages; i++) {
-                       if (page != stack_vm_area->pages[i])
-                               continue;
-
-                       addr = (u8 *)current->stack + i * PAGE_SIZE + offset;
-                       err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr);
-                       break;
-               }
-       }
-}
-
-static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
-{
-       unsigned long a1 = (unsigned long)addr;
-       unsigned long b1 = a1 + len;
-       unsigned long a2 = (unsigned long)start;
-       unsigned long b2 = (unsigned long)end;
-
-       return !(b1 <= a2 || a1 >= b2);
-}
-
-static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
-{
-       if (overlap(addr, len, _stext, _etext) ||
-           overlap(addr, len, __start_rodata, __end_rodata))
-               err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
-}
-
-static void check_sync(struct device *dev,
-                      struct dma_debug_entry *ref,
-                      bool to_cpu)
-{
-       struct dma_debug_entry *entry;
-       struct hash_bucket *bucket;
-       unsigned long flags;
-
-       bucket = get_hash_bucket(ref, &flags);
-
-       entry = bucket_find_contain(&bucket, ref, &flags);
-
-       if (!entry) {
-               err_printk(dev, NULL, "DMA-API: device driver tries "
-                               "to sync DMA memory it has not allocated "
-                               "[device address=0x%016llx] [size=%llu bytes]\n",
-                               (unsigned long long)ref->dev_addr, ref->size);
-               goto out;
-       }
-
-       if (ref->size > entry->size) {
-               err_printk(dev, entry, "DMA-API: device driver syncs"
-                               " DMA memory outside allocated range "
-                               "[device address=0x%016llx] "
-                               "[allocation size=%llu bytes] "
-                               "[sync offset+size=%llu]\n",
-                               entry->dev_addr, entry->size,
-                               ref->size);
-       }
-
-       if (entry->direction == DMA_BIDIRECTIONAL)
-               goto out;
-
-       if (ref->direction != entry->direction) {
-               err_printk(dev, entry, "DMA-API: device driver syncs "
-                               "DMA memory with different direction "
-                               "[device address=0x%016llx] [size=%llu bytes] "
-                               "[mapped with %s] [synced with %s]\n",
-                               (unsigned long long)ref->dev_addr, entry->size,
-                               dir2name[entry->direction],
-                               dir2name[ref->direction]);
-       }
-
-       if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) &&
-                     !(ref->direction == DMA_TO_DEVICE))
-               err_printk(dev, entry, "DMA-API: device driver syncs "
-                               "device read-only DMA memory for cpu "
-                               "[device address=0x%016llx] [size=%llu bytes] "
-                               "[mapped with %s] [synced with %s]\n",
-                               (unsigned long long)ref->dev_addr, entry->size,
-                               dir2name[entry->direction],
-                               dir2name[ref->direction]);
-
-       if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) &&
-                      !(ref->direction == DMA_FROM_DEVICE))
-               err_printk(dev, entry, "DMA-API: device driver syncs "
-                               "device write-only DMA memory to device "
-                               "[device address=0x%016llx] [size=%llu bytes] "
-                               "[mapped with %s] [synced with %s]\n",
-                               (unsigned long long)ref->dev_addr, entry->size,
-                               dir2name[entry->direction],
-                               dir2name[ref->direction]);
-
-       if (ref->sg_call_ents && ref->type == dma_debug_sg &&
-           ref->sg_call_ents != entry->sg_call_ents) {
-               err_printk(ref->dev, entry, "DMA-API: device driver syncs "
-                          "DMA sg list with different entry count "
-                          "[map count=%d] [sync count=%d]\n",
-                          entry->sg_call_ents, ref->sg_call_ents);
-       }
-
-out:
-       put_hash_bucket(bucket, &flags);
-}
-
-static void check_sg_segment(struct device *dev, struct scatterlist *sg)
-{
-#ifdef CONFIG_DMA_API_DEBUG_SG
-       unsigned int max_seg = dma_get_max_seg_size(dev);
-       u64 start, end, boundary = dma_get_seg_boundary(dev);
-
-       /*
-        * Either the driver forgot to set dma_parms appropriately, or
-        * whoever generated the list forgot to check them.
-        */
-       if (sg->length > max_seg)
-               err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n",
-                          sg->length, max_seg);
-       /*
-        * In some cases this could potentially be the DMA API
-        * implementation's fault, but it would usually imply that
-        * the scatterlist was built inappropriately to begin with.
-        */
-       start = sg_dma_address(sg);
-       end = start + sg_dma_len(sg) - 1;
-       if ((start ^ end) & ~boundary)
-               err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n",
-                          start, end, boundary);
-#endif
-}
-
-void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
-                       size_t size, int direction, dma_addr_t dma_addr,
-                       bool map_single)
-{
-       struct dma_debug_entry *entry;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       if (dma_mapping_error(dev, dma_addr))
-               return;
-
-       entry = dma_entry_alloc();
-       if (!entry)
-               return;
-
-       entry->dev       = dev;
-       entry->type      = dma_debug_page;
-       entry->pfn       = page_to_pfn(page);
-       entry->offset    = offset,
-       entry->dev_addr  = dma_addr;
-       entry->size      = size;
-       entry->direction = direction;
-       entry->map_err_type = MAP_ERR_NOT_CHECKED;
-
-       if (map_single)
-               entry->type = dma_debug_single;
-
-       check_for_stack(dev, page, offset);
-
-       if (!PageHighMem(page)) {
-               void *addr = page_address(page) + offset;
-
-               check_for_illegal_area(dev, addr, size);
-       }
-
-       add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_map_page);
-
-void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-       struct dma_debug_entry ref;
-       struct dma_debug_entry *entry;
-       struct hash_bucket *bucket;
-       unsigned long flags;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       ref.dev = dev;
-       ref.dev_addr = dma_addr;
-       bucket = get_hash_bucket(&ref, &flags);
-
-       list_for_each_entry(entry, &bucket->list, list) {
-               if (!exact_match(&ref, entry))
-                       continue;
-
-               /*
-                * The same physical address can be mapped multiple
-                * times. Without a hardware IOMMU this results in the
-                * same device addresses being put into the dma-debug
-                * hash multiple times too. This can result in false
-                * positives being reported. Therefore we implement a
-                * best-fit algorithm here which updates the first entry
-                * from the hash which fits the reference value and is
-                * not currently listed as being checked.
-                */
-               if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
-                       entry->map_err_type = MAP_ERR_CHECKED;
-                       break;
-               }
-       }
-
-       put_hash_bucket(bucket, &flags);
-}
-EXPORT_SYMBOL(debug_dma_mapping_error);
-
-void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
-                         size_t size, int direction, bool map_single)
-{
-       struct dma_debug_entry ref = {
-               .type           = dma_debug_page,
-               .dev            = dev,
-               .dev_addr       = addr,
-               .size           = size,
-               .direction      = direction,
-       };
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       if (map_single)
-               ref.type = dma_debug_single;
-
-       check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_unmap_page);
-
-void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
-                     int nents, int mapped_ents, int direction)
-{
-       struct dma_debug_entry *entry;
-       struct scatterlist *s;
-       int i;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       for_each_sg(sg, s, mapped_ents, i) {
-               entry = dma_entry_alloc();
-               if (!entry)
-                       return;
-
-               entry->type           = dma_debug_sg;
-               entry->dev            = dev;
-               entry->pfn            = page_to_pfn(sg_page(s));
-               entry->offset         = s->offset,
-               entry->size           = sg_dma_len(s);
-               entry->dev_addr       = sg_dma_address(s);
-               entry->direction      = direction;
-               entry->sg_call_ents   = nents;
-               entry->sg_mapped_ents = mapped_ents;
-
-               check_for_stack(dev, sg_page(s), s->offset);
-
-               if (!PageHighMem(sg_page(s))) {
-                       check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
-               }
-
-               check_sg_segment(dev, s);
-
-               add_dma_entry(entry);
-       }
-}
-EXPORT_SYMBOL(debug_dma_map_sg);
-
-static int get_nr_mapped_entries(struct device *dev,
-                                struct dma_debug_entry *ref)
-{
-       struct dma_debug_entry *entry;
-       struct hash_bucket *bucket;
-       unsigned long flags;
-       int mapped_ents;
-
-       bucket       = get_hash_bucket(ref, &flags);
-       entry        = bucket_find_exact(bucket, ref);
-       mapped_ents  = 0;
-
-       if (entry)
-               mapped_ents = entry->sg_mapped_ents;
-       put_hash_bucket(bucket, &flags);
-
-       return mapped_ents;
-}
-
-void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
-                       int nelems, int dir)
-{
-       struct scatterlist *s;
-       int mapped_ents = 0, i;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       for_each_sg(sglist, s, nelems, i) {
-
-               struct dma_debug_entry ref = {
-                       .type           = dma_debug_sg,
-                       .dev            = dev,
-                       .pfn            = page_to_pfn(sg_page(s)),
-                       .offset         = s->offset,
-                       .dev_addr       = sg_dma_address(s),
-                       .size           = sg_dma_len(s),
-                       .direction      = dir,
-                       .sg_call_ents   = nelems,
-               };
-
-               if (mapped_ents && i >= mapped_ents)
-                       break;
-
-               if (!i)
-                       mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-               check_unmap(&ref);
-       }
-}
-EXPORT_SYMBOL(debug_dma_unmap_sg);
-
-void debug_dma_alloc_coherent(struct device *dev, size_t size,
-                             dma_addr_t dma_addr, void *virt)
-{
-       struct dma_debug_entry *entry;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       if (unlikely(virt == NULL))
-               return;
-
-       /* handle vmalloc and linear addresses */
-       if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
-               return;
-
-       entry = dma_entry_alloc();
-       if (!entry)
-               return;
-
-       entry->type      = dma_debug_coherent;
-       entry->dev       = dev;
-       entry->offset    = offset_in_page(virt);
-       entry->size      = size;
-       entry->dev_addr  = dma_addr;
-       entry->direction = DMA_BIDIRECTIONAL;
-
-       if (is_vmalloc_addr(virt))
-               entry->pfn = vmalloc_to_pfn(virt);
-       else
-               entry->pfn = page_to_pfn(virt_to_page(virt));
-
-       add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_alloc_coherent);
-
-void debug_dma_free_coherent(struct device *dev, size_t size,
-                        void *virt, dma_addr_t addr)
-{
-       struct dma_debug_entry ref = {
-               .type           = dma_debug_coherent,
-               .dev            = dev,
-               .offset         = offset_in_page(virt),
-               .dev_addr       = addr,
-               .size           = size,
-               .direction      = DMA_BIDIRECTIONAL,
-       };
-
-       /* handle vmalloc and linear addresses */
-       if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
-               return;
-
-       if (is_vmalloc_addr(virt))
-               ref.pfn = vmalloc_to_pfn(virt);
-       else
-               ref.pfn = page_to_pfn(virt_to_page(virt));
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_free_coherent);
-
-void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
-                           int direction, dma_addr_t dma_addr)
-{
-       struct dma_debug_entry *entry;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       entry = dma_entry_alloc();
-       if (!entry)
-               return;
-
-       entry->type             = dma_debug_resource;
-       entry->dev              = dev;
-       entry->pfn              = PHYS_PFN(addr);
-       entry->offset           = offset_in_page(addr);
-       entry->size             = size;
-       entry->dev_addr         = dma_addr;
-       entry->direction        = direction;
-       entry->map_err_type     = MAP_ERR_NOT_CHECKED;
-
-       add_dma_entry(entry);
-}
-EXPORT_SYMBOL(debug_dma_map_resource);
-
-void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
-                             size_t size, int direction)
-{
-       struct dma_debug_entry ref = {
-               .type           = dma_debug_resource,
-               .dev            = dev,
-               .dev_addr       = dma_addr,
-               .size           = size,
-               .direction      = direction,
-       };
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       check_unmap(&ref);
-}
-EXPORT_SYMBOL(debug_dma_unmap_resource);
-
-void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
-                                  size_t size, int direction)
-{
-       struct dma_debug_entry ref;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       ref.type         = dma_debug_single;
-       ref.dev          = dev;
-       ref.dev_addr     = dma_handle;
-       ref.size         = size;
-       ref.direction    = direction;
-       ref.sg_call_ents = 0;
-
-       check_sync(dev, &ref, true);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
-
-void debug_dma_sync_single_for_device(struct device *dev,
-                                     dma_addr_t dma_handle, size_t size,
-                                     int direction)
-{
-       struct dma_debug_entry ref;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       ref.type         = dma_debug_single;
-       ref.dev          = dev;
-       ref.dev_addr     = dma_handle;
-       ref.size         = size;
-       ref.direction    = direction;
-       ref.sg_call_ents = 0;
-
-       check_sync(dev, &ref, false);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_for_device);
-
-void debug_dma_sync_single_range_for_cpu(struct device *dev,
-                                        dma_addr_t dma_handle,
-                                        unsigned long offset, size_t size,
-                                        int direction)
-{
-       struct dma_debug_entry ref;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       ref.type         = dma_debug_single;
-       ref.dev          = dev;
-       ref.dev_addr     = dma_handle;
-       ref.size         = offset + size;
-       ref.direction    = direction;
-       ref.sg_call_ents = 0;
-
-       check_sync(dev, &ref, true);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu);
-
-void debug_dma_sync_single_range_for_device(struct device *dev,
-                                           dma_addr_t dma_handle,
-                                           unsigned long offset,
-                                           size_t size, int direction)
-{
-       struct dma_debug_entry ref;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       ref.type         = dma_debug_single;
-       ref.dev          = dev;
-       ref.dev_addr     = dma_handle;
-       ref.size         = offset + size;
-       ref.direction    = direction;
-       ref.sg_call_ents = 0;
-
-       check_sync(dev, &ref, false);
-}
-EXPORT_SYMBOL(debug_dma_sync_single_range_for_device);
-
-void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
-                              int nelems, int direction)
-{
-       struct scatterlist *s;
-       int mapped_ents = 0, i;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       for_each_sg(sg, s, nelems, i) {
-
-               struct dma_debug_entry ref = {
-                       .type           = dma_debug_sg,
-                       .dev            = dev,
-                       .pfn            = page_to_pfn(sg_page(s)),
-                       .offset         = s->offset,
-                       .dev_addr       = sg_dma_address(s),
-                       .size           = sg_dma_len(s),
-                       .direction      = direction,
-                       .sg_call_ents   = nelems,
-               };
-
-               if (!i)
-                       mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-               if (i >= mapped_ents)
-                       break;
-
-               check_sync(dev, &ref, true);
-       }
-}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
-
-void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
-                                 int nelems, int direction)
-{
-       struct scatterlist *s;
-       int mapped_ents = 0, i;
-
-       if (unlikely(dma_debug_disabled()))
-               return;
-
-       for_each_sg(sg, s, nelems, i) {
-
-               struct dma_debug_entry ref = {
-                       .type           = dma_debug_sg,
-                       .dev            = dev,
-                       .pfn            = page_to_pfn(sg_page(s)),
-                       .offset         = s->offset,
-                       .dev_addr       = sg_dma_address(s),
-                       .size           = sg_dma_len(s),
-                       .direction      = direction,
-                       .sg_call_ents   = nelems,
-               };
-               if (!i)
-                       mapped_ents = get_nr_mapped_entries(dev, &ref);
-
-               if (i >= mapped_ents)
-                       break;
-
-               check_sync(dev, &ref, false);
-       }
-}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
-
-static int __init dma_debug_driver_setup(char *str)
-{
-       int i;
-
-       for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) {
-               current_driver_name[i] = *str;
-               if (*str == 0)
-                       break;
-       }
-
-       if (current_driver_name[0])
-               pr_info("DMA-API: enable driver filter for driver [%s]\n",
-                       current_driver_name);
-
-
-       return 1;
-}
-__setup("dma_debug_driver=", dma_debug_driver_setup);
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
deleted file mode 100644 (file)
index 8be8106..0000000
+++ /dev/null
@@ -1,204 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DMA operations that map physical memory directly without using an IOMMU or
- * flushing caches.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-direct.h>
-#include <linux/scatterlist.h>
-#include <linux/dma-contiguous.h>
-#include <linux/pfn.h>
-#include <linux/set_memory.h>
-
-#define DIRECT_MAPPING_ERROR           0
-
-/*
- * Most architectures use ZONE_DMA for the first 16 Megabytes, but
- * some use it for entirely different regions:
- */
-#ifndef ARCH_ZONE_DMA_BITS
-#define ARCH_ZONE_DMA_BITS 24
-#endif
-
-/*
- * For AMD SEV all DMA must be to unencrypted addresses.
- */
-static inline bool force_dma_unencrypted(void)
-{
-       return sev_active();
-}
-
-static bool
-check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
-               const char *caller)
-{
-       if (unlikely(dev && !dma_capable(dev, dma_addr, size))) {
-               if (!dev->dma_mask) {
-                       dev_err(dev,
-                               "%s: call on device without dma_mask\n",
-                               caller);
-                       return false;
-               }
-
-               if (*dev->dma_mask >= DMA_BIT_MASK(32)) {
-                       dev_err(dev,
-                               "%s: overflow %pad+%zu of device mask %llx\n",
-                               caller, &dma_addr, size, *dev->dma_mask);
-               }
-               return false;
-       }
-       return true;
-}
-
-static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
-{
-       dma_addr_t addr = force_dma_unencrypted() ?
-               __phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
-       return addr + size - 1 <= dev->coherent_dma_mask;
-}
-
-void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
-{
-       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-       int page_order = get_order(size);
-       struct page *page = NULL;
-       void *ret;
-
-       /* we always manually zero the memory once we are done: */
-       gfp &= ~__GFP_ZERO;
-
-       /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
-       if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
-               gfp |= GFP_DMA;
-       if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA))
-               gfp |= GFP_DMA32;
-
-again:
-       /* CMA can be used only in the context which permits sleeping */
-       if (gfpflags_allow_blocking(gfp)) {
-               page = dma_alloc_from_contiguous(dev, count, page_order, gfp);
-               if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
-                       dma_release_from_contiguous(dev, page, count);
-                       page = NULL;
-               }
-       }
-       if (!page)
-               page = alloc_pages_node(dev_to_node(dev), gfp, page_order);
-
-       if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
-               __free_pages(page, page_order);
-               page = NULL;
-
-               if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
-                   dev->coherent_dma_mask < DMA_BIT_MASK(64) &&
-                   !(gfp & (GFP_DMA32 | GFP_DMA))) {
-                       gfp |= GFP_DMA32;
-                       goto again;
-               }
-
-               if (IS_ENABLED(CONFIG_ZONE_DMA) &&
-                   dev->coherent_dma_mask < DMA_BIT_MASK(32) &&
-                   !(gfp & GFP_DMA)) {
-                       gfp = (gfp & ~GFP_DMA32) | GFP_DMA;
-                       goto again;
-               }
-       }
-
-       if (!page)
-               return NULL;
-       ret = page_address(page);
-       if (force_dma_unencrypted()) {
-               set_memory_decrypted((unsigned long)ret, 1 << page_order);
-               *dma_handle = __phys_to_dma(dev, page_to_phys(page));
-       } else {
-               *dma_handle = phys_to_dma(dev, page_to_phys(page));
-       }
-       memset(ret, 0, size);
-       return ret;
-}
-
-/*
- * NOTE: this function must never look at the dma_addr argument, because we want
- * to be able to use it as a helper for iommu implementations as well.
- */
-void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
-               dma_addr_t dma_addr, unsigned long attrs)
-{
-       unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
-       unsigned int page_order = get_order(size);
-
-       if (force_dma_unencrypted())
-               set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
-       if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
-               free_pages((unsigned long)cpu_addr, page_order);
-}
-
-dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
-               unsigned long offset, size_t size, enum dma_data_direction dir,
-               unsigned long attrs)
-{
-       dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset;
-
-       if (!check_addr(dev, dma_addr, size, __func__))
-               return DIRECT_MAPPING_ERROR;
-       return dma_addr;
-}
-
-int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
-               enum dma_data_direction dir, unsigned long attrs)
-{
-       int i;
-       struct scatterlist *sg;
-
-       for_each_sg(sgl, sg, nents, i) {
-               BUG_ON(!sg_page(sg));
-
-               sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg));
-               if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__))
-                       return 0;
-               sg_dma_len(sg) = sg->length;
-       }
-
-       return nents;
-}
-
-int dma_direct_supported(struct device *dev, u64 mask)
-{
-#ifdef CONFIG_ZONE_DMA
-       if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
-               return 0;
-#else
-       /*
-        * Because 32-bit DMA masks are so common we expect every architecture
-        * to be able to satisfy them - either by not supporting more physical
-        * memory, or by providing a ZONE_DMA32.  If neither is the case, the
-        * architecture needs to use an IOMMU instead of the direct mapping.
-        */
-       if (mask < DMA_BIT_MASK(32))
-               return 0;
-#endif
-       /*
-        * Various PCI/PCIe bridges have broken support for > 32bit DMA even
-        * if the device itself might support it.
-        */
-       if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32))
-               return 0;
-       return 1;
-}
-
-int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr)
-{
-       return dma_addr == DIRECT_MAPPING_ERROR;
-}
-
-const struct dma_map_ops dma_direct_ops = {
-       .alloc                  = dma_direct_alloc,
-       .free                   = dma_direct_free,
-       .map_page               = dma_direct_map_page,
-       .map_sg                 = dma_direct_map_sg,
-       .dma_supported          = dma_direct_supported,
-       .mapping_error          = dma_direct_mapping_error,
-};
-EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c
deleted file mode 100644 (file)
index 79e9a75..0000000
+++ /dev/null
@@ -1,102 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018 Christoph Hellwig.
- *
- * DMA operations that map physical memory directly without providing cache
- * coherence.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
-#include <linux/scatterlist.h>
-
-static void dma_noncoherent_sync_single_for_device(struct device *dev,
-               dma_addr_t addr, size_t size, enum dma_data_direction dir)
-{
-       arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir);
-}
-
-static void dma_noncoherent_sync_sg_for_device(struct device *dev,
-               struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
-       struct scatterlist *sg;
-       int i;
-
-       for_each_sg(sgl, sg, nents, i)
-               arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
-}
-
-static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page,
-               unsigned long offset, size_t size, enum dma_data_direction dir,
-               unsigned long attrs)
-{
-       dma_addr_t addr;
-
-       addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
-       if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-               arch_sync_dma_for_device(dev, page_to_phys(page) + offset,
-                               size, dir);
-       return addr;
-}
-
-static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl,
-               int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-       nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs);
-       if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-               dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir);
-       return nents;
-}
-
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
-static void dma_noncoherent_sync_single_for_cpu(struct device *dev,
-               dma_addr_t addr, size_t size, enum dma_data_direction dir)
-{
-       arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir);
-}
-
-static void dma_noncoherent_sync_sg_for_cpu(struct device *dev,
-               struct scatterlist *sgl, int nents, enum dma_data_direction dir)
-{
-       struct scatterlist *sg;
-       int i;
-
-       for_each_sg(sgl, sg, nents, i)
-               arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
-}
-
-static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr,
-               size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
-       if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-               dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir);
-}
-
-static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl,
-               int nents, enum dma_data_direction dir, unsigned long attrs)
-{
-       if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
-               dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir);
-}
-#endif
-
-const struct dma_map_ops dma_noncoherent_ops = {
-       .alloc                  = arch_dma_alloc,
-       .free                   = arch_dma_free,
-       .mmap                   = arch_dma_mmap,
-       .sync_single_for_device = dma_noncoherent_sync_single_for_device,
-       .sync_sg_for_device     = dma_noncoherent_sync_sg_for_device,
-       .map_page               = dma_noncoherent_map_page,
-       .map_sg                 = dma_noncoherent_map_sg,
-#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU
-       .sync_single_for_cpu    = dma_noncoherent_sync_single_for_cpu,
-       .sync_sg_for_cpu        = dma_noncoherent_sync_sg_for_cpu,
-       .unmap_page             = dma_noncoherent_unmap_page,
-       .unmap_sg               = dma_noncoherent_unmap_sg,
-#endif
-       .dma_supported          = dma_direct_supported,
-       .mapping_error          = dma_direct_mapping_error,
-       .cache_sync             = arch_dma_cache_sync,
-};
-EXPORT_SYMBOL(dma_noncoherent_ops);
diff --git a/lib/dma-virt.c b/lib/dma-virt.c
deleted file mode 100644 (file)
index 8e61a02..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *     lib/dma-virt.c
- *
- * DMA operations that map to virtual addresses without flushing memory.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-
-static void *dma_virt_alloc(struct device *dev, size_t size,
-                           dma_addr_t *dma_handle, gfp_t gfp,
-                           unsigned long attrs)
-{
-       void *ret;
-
-       ret = (void *)__get_free_pages(gfp, get_order(size));
-       if (ret)
-               *dma_handle = (uintptr_t)ret;
-       return ret;
-}
-
-static void dma_virt_free(struct device *dev, size_t size,
-                         void *cpu_addr, dma_addr_t dma_addr,
-                         unsigned long attrs)
-{
-       free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
-                                   unsigned long offset, size_t size,
-                                   enum dma_data_direction dir,
-                                   unsigned long attrs)
-{
-       return (uintptr_t)(page_address(page) + offset);
-}
-
-static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
-                          int nents, enum dma_data_direction dir,
-                          unsigned long attrs)
-{
-       int i;
-       struct scatterlist *sg;
-
-       for_each_sg(sgl, sg, nents, i) {
-               BUG_ON(!sg_page(sg));
-               sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
-               sg_dma_len(sg) = sg->length;
-       }
-
-       return nents;
-}
-
-const struct dma_map_ops dma_virt_ops = {
-       .alloc                  = dma_virt_alloc,
-       .free                   = dma_virt_free,
-       .map_page               = dma_virt_map_page,
-       .map_sg                 = dma_virt_map_sg,
-};
-EXPORT_SYMBOL(dma_virt_ops);
index 0eb4835..d3b81ce 100644 (file)
@@ -350,3 +350,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock)
 }
 EXPORT_SYMBOL(refcount_dec_and_lock);
 
+/**
+ * refcount_dec_and_lock_irqsave - return holding spinlock with disabled
+ *                                 interrupts if able to decrement refcount to 0
+ * @r: the refcount
+ * @lock: the spinlock to be locked
+ * @flags: saved IRQ-flags if the is acquired
+ *
+ * Same as refcount_dec_and_lock() above except that the spinlock is acquired
+ * with disabled interupts.
+ *
+ * Return: true and hold spinlock if able to decrement refcount to 0, false
+ *         otherwise
+ */
+bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock,
+                                  unsigned long *flags)
+{
+       if (refcount_dec_not_one(r))
+               return false;
+
+       spin_lock_irqsave(lock, *flags);
+       if (!refcount_dec_and_test(r)) {
+               spin_unlock_irqrestore(lock, *flags);
+               return false;
+       }
+
+       return true;
+}
+EXPORT_SYMBOL(refcount_dec_and_lock_irqsave);
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
deleted file mode 100644 (file)
index 04b68d9..0000000
+++ /dev/null
@@ -1,1087 +0,0 @@
-/*
- * Dynamic DMA mapping support.
- *
- * This implementation is a fallback for platforms that do not support
- * I/O TLBs (aka DMA address translation hardware).
- * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com>
- * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com>
- * Copyright (C) 2000, 2003 Hewlett-Packard Co
- *     David Mosberger-Tang <davidm@hpl.hp.com>
- *
- * 03/05/07 davidm     Switch from PCI-DMA to generic device DMA API.
- * 00/12/13 davidm     Rename to swiotlb.c and add mark_clean() to avoid
- *                     unnecessary i-cache flushing.
- * 04/07/.. ak         Better overflow handling. Assorted fixes.
- * 05/09/10 linville   Add support for syncing ranges, support syncing for
- *                     DMA_BIDIRECTIONAL mappings, miscellaneous cleanup.
- * 08/12/11 beckyb     Add highmem support
- */
-
-#include <linux/cache.h>
-#include <linux/dma-direct.h>
-#include <linux/mm.h>
-#include <linux/export.h>
-#include <linux/spinlock.h>
-#include <linux/string.h>
-#include <linux/swiotlb.h>
-#include <linux/pfn.h>
-#include <linux/types.h>
-#include <linux/ctype.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/scatterlist.h>
-#include <linux/mem_encrypt.h>
-#include <linux/set_memory.h>
-
-#include <asm/io.h>
-#include <asm/dma.h>
-
-#include <linux/init.h>
-#include <linux/bootmem.h>
-#include <linux/iommu-helper.h>
-
-#define CREATE_TRACE_POINTS
-#include <trace/events/swiotlb.h>
-
-#define OFFSET(val,align) ((unsigned long)     \
-                          ( (val) & ( (align) - 1)))
-
-#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
-
-/*
- * Minimum IO TLB size to bother booting with.  Systems with mainly
- * 64bit capable cards will only lightly use the swiotlb.  If we can't
- * allocate a contiguous 1MB, we're probably in trouble anyway.
- */
-#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-
-enum swiotlb_force swiotlb_force;
-
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end.  This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
-
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-static phys_addr_t io_tlb_overflow_buffer;
-
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
-
-/*
- * Max segment that we can provide which (if pages are contingous) will
- * not be bounced (unless SWIOTLB_FORCE is set).
- */
-unsigned int max_segment;
-
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
-static int late_alloc;
-
-static int __init
-setup_io_tlb_npages(char *str)
-{
-       if (isdigit(*str)) {
-               io_tlb_nslabs = simple_strtoul(str, &str, 0);
-               /* avoid tail segment of size < IO_TLB_SEGSIZE */
-               io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-       }
-       if (*str == ',')
-               ++str;
-       if (!strcmp(str, "force")) {
-               swiotlb_force = SWIOTLB_FORCE;
-       } else if (!strcmp(str, "noforce")) {
-               swiotlb_force = SWIOTLB_NO_FORCE;
-               io_tlb_nslabs = 1;
-       }
-
-       return 0;
-}
-early_param("swiotlb", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
-
-unsigned long swiotlb_nr_tbl(void)
-{
-       return io_tlb_nslabs;
-}
-EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
-
-unsigned int swiotlb_max_segment(void)
-{
-       return max_segment;
-}
-EXPORT_SYMBOL_GPL(swiotlb_max_segment);
-
-void swiotlb_set_max_segment(unsigned int val)
-{
-       if (swiotlb_force == SWIOTLB_FORCE)
-               max_segment = 1;
-       else
-               max_segment = rounddown(val, PAGE_SIZE);
-}
-
-/* default to 64MB */
-#define IO_TLB_DEFAULT_SIZE (64UL<<20)
-unsigned long swiotlb_size_or_default(void)
-{
-       unsigned long size;
-
-       size = io_tlb_nslabs << IO_TLB_SHIFT;
-
-       return size ? size : (IO_TLB_DEFAULT_SIZE);
-}
-
-static bool no_iotlb_memory;
-
-void swiotlb_print_info(void)
-{
-       unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-       unsigned char *vstart, *vend;
-
-       if (no_iotlb_memory) {
-               pr_warn("software IO TLB: No low mem\n");
-               return;
-       }
-
-       vstart = phys_to_virt(io_tlb_start);
-       vend = phys_to_virt(io_tlb_end);
-
-       printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n",
-              (unsigned long long)io_tlb_start,
-              (unsigned long long)io_tlb_end,
-              bytes >> 20, vstart, vend - 1);
-}
-
-/*
- * Early SWIOTLB allocation may be too early to allow an architecture to
- * perform the desired operations.  This function allows the architecture to
- * call SWIOTLB when the operations are possible.  It needs to be called
- * before the SWIOTLB memory is used.
- */
-void __init swiotlb_update_mem_attributes(void)
-{
-       void *vaddr;
-       unsigned long bytes;
-
-       if (no_iotlb_memory || late_alloc)
-               return;
-
-       vaddr = phys_to_virt(io_tlb_start);
-       bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
-       set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-       memset(vaddr, 0, bytes);
-
-       vaddr = phys_to_virt(io_tlb_overflow_buffer);
-       bytes = PAGE_ALIGN(io_tlb_overflow);
-       set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-       memset(vaddr, 0, bytes);
-}
-
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
-{
-       void *v_overflow_buffer;
-       unsigned long i, bytes;
-
-       bytes = nslabs << IO_TLB_SHIFT;
-
-       io_tlb_nslabs = nslabs;
-       io_tlb_start = __pa(tlb);
-       io_tlb_end = io_tlb_start + bytes;
-
-       /*
-        * Get the overflow emergency buffer
-        */
-       v_overflow_buffer = memblock_virt_alloc_low_nopanic(
-                                               PAGE_ALIGN(io_tlb_overflow),
-                                               PAGE_SIZE);
-       if (!v_overflow_buffer)
-               return -ENOMEM;
-
-       io_tlb_overflow_buffer = __pa(v_overflow_buffer);
-
-       /*
-        * Allocate and initialize the free list array.  This array is used
-        * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-        * between io_tlb_start and io_tlb_end.
-        */
-       io_tlb_list = memblock_virt_alloc(
-                               PAGE_ALIGN(io_tlb_nslabs * sizeof(int)),
-                               PAGE_SIZE);
-       io_tlb_orig_addr = memblock_virt_alloc(
-                               PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)),
-                               PAGE_SIZE);
-       for (i = 0; i < io_tlb_nslabs; i++) {
-               io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-               io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-       }
-       io_tlb_index = 0;
-
-       if (verbose)
-               swiotlb_print_info();
-
-       swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
-       return 0;
-}
-
-/*
- * Statically reserve bounce buffer space and initialize bounce buffer data
- * structures for the software IO TLB used to implement the DMA API.
- */
-void  __init
-swiotlb_init(int verbose)
-{
-       size_t default_size = IO_TLB_DEFAULT_SIZE;
-       unsigned char *vstart;
-       unsigned long bytes;
-
-       if (!io_tlb_nslabs) {
-               io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-               io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-       }
-
-       bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
-       /* Get IO TLB memory from the low pages */
-       vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE);
-       if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
-               return;
-
-       if (io_tlb_start)
-               memblock_free_early(io_tlb_start,
-                                   PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-       pr_warn("Cannot allocate SWIOTLB buffer");
-       no_iotlb_memory = true;
-}
-
-/*
- * Systems with larger DMA zones (those that don't support ISA) can
- * initialize the swiotlb later using the slab allocator if needed.
- * This should be just like above, but with some error catching.
- */
-int
-swiotlb_late_init_with_default_size(size_t default_size)
-{
-       unsigned long bytes, req_nslabs = io_tlb_nslabs;
-       unsigned char *vstart = NULL;
-       unsigned int order;
-       int rc = 0;
-
-       if (!io_tlb_nslabs) {
-               io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
-               io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-       }
-
-       /*
-        * Get IO TLB memory from the low pages
-        */
-       order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
-       io_tlb_nslabs = SLABS_PER_PAGE << order;
-       bytes = io_tlb_nslabs << IO_TLB_SHIFT;
-
-       while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
-               vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
-                                                 order);
-               if (vstart)
-                       break;
-               order--;
-       }
-
-       if (!vstart) {
-               io_tlb_nslabs = req_nslabs;
-               return -ENOMEM;
-       }
-       if (order != get_order(bytes)) {
-               printk(KERN_WARNING "Warning: only able to allocate %ld MB "
-                      "for software IO TLB\n", (PAGE_SIZE << order) >> 20);
-               io_tlb_nslabs = SLABS_PER_PAGE << order;
-       }
-       rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
-       if (rc)
-               free_pages((unsigned long)vstart, order);
-
-       return rc;
-}
-
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
-{
-       unsigned long i, bytes;
-       unsigned char *v_overflow_buffer;
-
-       bytes = nslabs << IO_TLB_SHIFT;
-
-       io_tlb_nslabs = nslabs;
-       io_tlb_start = virt_to_phys(tlb);
-       io_tlb_end = io_tlb_start + bytes;
-
-       set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
-       memset(tlb, 0, bytes);
-
-       /*
-        * Get the overflow emergency buffer
-        */
-       v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
-                                                    get_order(io_tlb_overflow));
-       if (!v_overflow_buffer)
-               goto cleanup2;
-
-       set_memory_decrypted((unsigned long)v_overflow_buffer,
-                       io_tlb_overflow >> PAGE_SHIFT);
-       memset(v_overflow_buffer, 0, io_tlb_overflow);
-       io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
-
-       /*
-        * Allocate and initialize the free list array.  This array is used
-        * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
-        * between io_tlb_start and io_tlb_end.
-        */
-       io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-                                     get_order(io_tlb_nslabs * sizeof(int)));
-       if (!io_tlb_list)
-               goto cleanup3;
-
-       io_tlb_orig_addr = (phys_addr_t *)
-               __get_free_pages(GFP_KERNEL,
-                                get_order(io_tlb_nslabs *
-                                          sizeof(phys_addr_t)));
-       if (!io_tlb_orig_addr)
-               goto cleanup4;
-
-       for (i = 0; i < io_tlb_nslabs; i++) {
-               io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
-               io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-       }
-       io_tlb_index = 0;
-
-       swiotlb_print_info();
-
-       late_alloc = 1;
-
-       swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
-
-       return 0;
-
-cleanup4:
-       free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-                                                        sizeof(int)));
-       io_tlb_list = NULL;
-cleanup3:
-       free_pages((unsigned long)v_overflow_buffer,
-                  get_order(io_tlb_overflow));
-       io_tlb_overflow_buffer = 0;
-cleanup2:
-       io_tlb_end = 0;
-       io_tlb_start = 0;
-       io_tlb_nslabs = 0;
-       max_segment = 0;
-       return -ENOMEM;
-}
-
-void __init swiotlb_exit(void)
-{
-       if (!io_tlb_orig_addr)
-               return;
-
-       if (late_alloc) {
-               free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
-                          get_order(io_tlb_overflow));
-               free_pages((unsigned long)io_tlb_orig_addr,
-                          get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
-               free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
-                                                                sizeof(int)));
-               free_pages((unsigned long)phys_to_virt(io_tlb_start),
-                          get_order(io_tlb_nslabs << IO_TLB_SHIFT));
-       } else {
-               memblock_free_late(io_tlb_overflow_buffer,
-                                  PAGE_ALIGN(io_tlb_overflow));
-               memblock_free_late(__pa(io_tlb_orig_addr),
-                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
-               memblock_free_late(__pa(io_tlb_list),
-                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
-               memblock_free_late(io_tlb_start,
-                                  PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
-       }
-       io_tlb_nslabs = 0;
-       max_segment = 0;
-}
-
-int is_swiotlb_buffer(phys_addr_t paddr)
-{
-       return paddr >= io_tlb_start && paddr < io_tlb_end;
-}
-
-/*
- * Bounce: copy the swiotlb buffer back to the original dma location
- */
-static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
-                          size_t size, enum dma_data_direction dir)
-{
-       unsigned long pfn = PFN_DOWN(orig_addr);
-       unsigned char *vaddr = phys_to_virt(tlb_addr);
-
-       if (PageHighMem(pfn_to_page(pfn))) {
-               /* The buffer does not have a mapping.  Map it in and copy */
-               unsigned int offset = orig_addr & ~PAGE_MASK;
-               char *buffer;
-               unsigned int sz = 0;
-               unsigned long flags;
-
-               while (size) {
-                       sz = min_t(size_t, PAGE_SIZE - offset, size);
-
-                       local_irq_save(flags);
-                       buffer = kmap_atomic(pfn_to_page(pfn));
-                       if (dir == DMA_TO_DEVICE)
-                               memcpy(vaddr, buffer + offset, sz);
-                       else
-                               memcpy(buffer + offset, vaddr, sz);
-                       kunmap_atomic(buffer);
-                       local_irq_restore(flags);
-
-                       size -= sz;
-                       pfn++;
-                       vaddr += sz;
-                       offset = 0;
-               }
-       } else if (dir == DMA_TO_DEVICE) {
-               memcpy(vaddr, phys_to_virt(orig_addr), size);
-       } else {
-               memcpy(phys_to_virt(orig_addr), vaddr, size);
-       }
-}
-
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
-                                  dma_addr_t tbl_dma_addr,
-                                  phys_addr_t orig_addr, size_t size,
-                                  enum dma_data_direction dir,
-                                  unsigned long attrs)
-{
-       unsigned long flags;
-       phys_addr_t tlb_addr;
-       unsigned int nslots, stride, index, wrap;
-       int i;
-       unsigned long mask;
-       unsigned long offset_slots;
-       unsigned long max_slots;
-
-       if (no_iotlb_memory)
-               panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
-
-       if (mem_encrypt_active())
-               pr_warn_once("%s is active and system is using DMA bounce buffers\n",
-                            sme_active() ? "SME" : "SEV");
-
-       mask = dma_get_seg_boundary(hwdev);
-
-       tbl_dma_addr &= mask;
-
-       offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-
-       /*
-        * Carefully handle integer overflow which can occur when mask == ~0UL.
-        */
-       max_slots = mask + 1
-                   ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
-                   : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
-
-       /*
-        * For mappings greater than or equal to a page, we limit the stride
-        * (and hence alignment) to a page size.
-        */
-       nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-       if (size >= PAGE_SIZE)
-               stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-       else
-               stride = 1;
-
-       BUG_ON(!nslots);
-
-       /*
-        * Find suitable number of IO TLB entries size that will fit this
-        * request and allocate a buffer from that IO TLB pool.
-        */
-       spin_lock_irqsave(&io_tlb_lock, flags);
-       index = ALIGN(io_tlb_index, stride);
-       if (index >= io_tlb_nslabs)
-               index = 0;
-       wrap = index;
-
-       do {
-               while (iommu_is_span_boundary(index, nslots, offset_slots,
-                                             max_slots)) {
-                       index += stride;
-                       if (index >= io_tlb_nslabs)
-                               index = 0;
-                       if (index == wrap)
-                               goto not_found;
-               }
-
-               /*
-                * If we find a slot that indicates we have 'nslots' number of
-                * contiguous buffers, we allocate the buffers from that slot
-                * and mark the entries as '0' indicating unavailable.
-                */
-               if (io_tlb_list[index] >= nslots) {
-                       int count = 0;
-
-                       for (i = index; i < (int) (index + nslots); i++)
-                               io_tlb_list[i] = 0;
-                       for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-                               io_tlb_list[i] = ++count;
-                       tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-                       /*
-                        * Update the indices to avoid searching in the next
-                        * round.
-                        */
-                       io_tlb_index = ((index + nslots) < io_tlb_nslabs
-                                       ? (index + nslots) : 0);
-
-                       goto found;
-               }
-               index += stride;
-               if (index >= io_tlb_nslabs)
-                       index = 0;
-       } while (index != wrap);
-
-not_found:
-       spin_unlock_irqrestore(&io_tlb_lock, flags);
-       if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
-               dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size);
-       return SWIOTLB_MAP_ERROR;
-found:
-       spin_unlock_irqrestore(&io_tlb_lock, flags);
-
-       /*
-        * Save away the mapping from the original address to the DMA address.
-        * This is needed when we sync the memory.  Then we sync the buffer if
-        * needed.
-        */
-       for (i = 0; i < nslots; i++)
-               io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-       if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-           (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-               swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
-
-       return tlb_addr;
-}
-
-/*
- * Allocates bounce buffer and returns its physical address.
- */
-static phys_addr_t
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
-          enum dma_data_direction dir, unsigned long attrs)
-{
-       dma_addr_t start_dma_addr;
-
-       if (swiotlb_force == SWIOTLB_NO_FORCE) {
-               dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
-                                    &phys);
-               return SWIOTLB_MAP_ERROR;
-       }
-
-       start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
-       return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
-                                     dir, attrs);
-}
-
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
- */
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
-                             size_t size, enum dma_data_direction dir,
-                             unsigned long attrs)
-{
-       unsigned long flags;
-       int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-       int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-       phys_addr_t orig_addr = io_tlb_orig_addr[index];
-
-       /*
-        * First, sync the memory before unmapping the entry
-        */
-       if (orig_addr != INVALID_PHYS_ADDR &&
-           !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-           ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
-               swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE);
-
-       /*
-        * Return the buffer to the free list by setting the corresponding
-        * entries to indicate the number of contiguous entries available.
-        * While returning the entries to the free list, we merge the entries
-        * with slots below and above the pool being returned.
-        */
-       spin_lock_irqsave(&io_tlb_lock, flags);
-       {
-               count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-                        io_tlb_list[index + nslots] : 0);
-               /*
-                * Step 1: return the slots to the free list, merging the
-                * slots with superceeding slots
-                */
-               for (i = index + nslots - 1; i >= index; i--) {
-                       io_tlb_list[i] = ++count;
-                       io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-               }
-               /*
-                * Step 2: merge the returned slots with the preceding slots,
-                * if available (non zero)
-                */
-               for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-                       io_tlb_list[i] = ++count;
-       }
-       spin_unlock_irqrestore(&io_tlb_lock, flags);
-}
-
-void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
-                            size_t size, enum dma_data_direction dir,
-                            enum dma_sync_target target)
-{
-       int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
-       phys_addr_t orig_addr = io_tlb_orig_addr[index];
-
-       if (orig_addr == INVALID_PHYS_ADDR)
-               return;
-       orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
-
-       switch (target) {
-       case SYNC_FOR_CPU:
-               if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
-                       swiotlb_bounce(orig_addr, tlb_addr,
-                                      size, DMA_FROM_DEVICE);
-               else
-                       BUG_ON(dir != DMA_TO_DEVICE);
-               break;
-       case SYNC_FOR_DEVICE:
-               if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-                       swiotlb_bounce(orig_addr, tlb_addr,
-                                      size, DMA_TO_DEVICE);
-               else
-                       BUG_ON(dir != DMA_FROM_DEVICE);
-               break;
-       default:
-               BUG();
-       }
-}
-
-static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
-               size_t size)
-{
-       u64 mask = DMA_BIT_MASK(32);
-
-       if (dev && dev->coherent_dma_mask)
-               mask = dev->coherent_dma_mask;
-       return addr + size - 1 <= mask;
-}
-
-static void *
-swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               unsigned long attrs)
-{
-       phys_addr_t phys_addr;
-
-       if (swiotlb_force == SWIOTLB_NO_FORCE)
-               goto out_warn;
-
-       phys_addr = swiotlb_tbl_map_single(dev,
-                       __phys_to_dma(dev, io_tlb_start),
-                       0, size, DMA_FROM_DEVICE, attrs);
-       if (phys_addr == SWIOTLB_MAP_ERROR)
-               goto out_warn;
-
-       *dma_handle = __phys_to_dma(dev, phys_addr);
-       if (!dma_coherent_ok(dev, *dma_handle, size))
-               goto out_unmap;
-
-       memset(phys_to_virt(phys_addr), 0, size);
-       return phys_to_virt(phys_addr);
-
-out_unmap:
-       dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
-               (unsigned long long)dev->coherent_dma_mask,
-               (unsigned long long)*dma_handle);
-
-       /*
-        * DMA_TO_DEVICE to avoid memcpy in unmap_single.
-        * DMA_ATTR_SKIP_CPU_SYNC is optional.
-        */
-       swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-                       DMA_ATTR_SKIP_CPU_SYNC);
-out_warn:
-       if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
-               dev_warn(dev,
-                       "swiotlb: coherent allocation failed, size=%zu\n",
-                       size);
-               dump_stack();
-       }
-       return NULL;
-}
-
-static bool swiotlb_free_buffer(struct device *dev, size_t size,
-               dma_addr_t dma_addr)
-{
-       phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
-
-       WARN_ON_ONCE(irqs_disabled());
-
-       if (!is_swiotlb_buffer(phys_addr))
-               return false;
-
-       /*
-        * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
-        * DMA_ATTR_SKIP_CPU_SYNC is optional.
-        */
-       swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
-                                DMA_ATTR_SKIP_CPU_SYNC);
-       return true;
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
-            int do_panic)
-{
-       if (swiotlb_force == SWIOTLB_NO_FORCE)
-               return;
-
-       /*
-        * Ran out of IOMMU space for this operation. This is very bad.
-        * Unfortunately the drivers cannot handle this operation properly.
-        * unless they check for dma_mapping_error (most don't)
-        * When the mapping is small enough return a static buffer to limit
-        * the damage, or panic when the transfer is too big.
-        */
-       dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
-                           size);
-
-       if (size <= io_tlb_overflow || !do_panic)
-               return;
-
-       if (dir == DMA_BIDIRECTIONAL)
-               panic("DMA: Random memory could be DMA accessed\n");
-       if (dir == DMA_FROM_DEVICE)
-               panic("DMA: Random memory could be DMA written\n");
-       if (dir == DMA_TO_DEVICE)
-               panic("DMA: Random memory could be DMA read\n");
-}
-
-/*
- * Map a single buffer of the indicated size for DMA in streaming mode.  The
- * physical address to use is returned.
- *
- * Once the device is given the dma address, the device owns this memory until
- * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed.
- */
-dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
-                           unsigned long offset, size_t size,
-                           enum dma_data_direction dir,
-                           unsigned long attrs)
-{
-       phys_addr_t map, phys = page_to_phys(page) + offset;
-       dma_addr_t dev_addr = phys_to_dma(dev, phys);
-
-       BUG_ON(dir == DMA_NONE);
-       /*
-        * If the address happens to be in the device's DMA window,
-        * we can safely return the device addr and not worry about bounce
-        * buffering it.
-        */
-       if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
-               return dev_addr;
-
-       trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
-
-       /* Oh well, have to allocate and map a bounce buffer. */
-       map = map_single(dev, phys, size, dir, attrs);
-       if (map == SWIOTLB_MAP_ERROR) {
-               swiotlb_full(dev, size, dir, 1);
-               return __phys_to_dma(dev, io_tlb_overflow_buffer);
-       }
-
-       dev_addr = __phys_to_dma(dev, map);
-
-       /* Ensure that the address returned is DMA'ble */
-       if (dma_capable(dev, dev_addr, size))
-               return dev_addr;
-
-       attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-       swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
-
-       return __phys_to_dma(dev, io_tlb_overflow_buffer);
-}
-
-/*
- * Unmap a single streaming mode DMA translation.  The dma_addr and size must
- * match what was provided for in a previous swiotlb_map_page call.  All
- * other usages are undefined.
- *
- * After this call, reads by the cpu to the buffer are guaranteed to see
- * whatever the device wrote there.
- */
-static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
-                        size_t size, enum dma_data_direction dir,
-                        unsigned long attrs)
-{
-       phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
-       BUG_ON(dir == DMA_NONE);
-
-       if (is_swiotlb_buffer(paddr)) {
-               swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
-               return;
-       }
-
-       if (dir != DMA_FROM_DEVICE)
-               return;
-
-       /*
-        * phys_to_virt doesn't work with hihgmem page but we could
-        * call dma_mark_clean() with hihgmem page here. However, we
-        * are fine since dma_mark_clean() is null on POWERPC. We can
-        * make dma_mark_clean() take a physical address if necessary.
-        */
-       dma_mark_clean(phys_to_virt(paddr), size);
-}
-
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
-                       size_t size, enum dma_data_direction dir,
-                       unsigned long attrs)
-{
-       unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
-/*
- * Make physical memory consistent for a single streaming mode DMA translation
- * after a transfer.
- *
- * If you perform a swiotlb_map_page() but wish to interrogate the buffer
- * using the cpu, yet do not wish to teardown the dma mapping, you must
- * call this function before doing so.  At the next point you give the dma
- * address back to the card, you must first perform a
- * swiotlb_dma_sync_for_device, and then the device again owns the buffer
- */
-static void
-swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr,
-                   size_t size, enum dma_data_direction dir,
-                   enum dma_sync_target target)
-{
-       phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
-
-       BUG_ON(dir == DMA_NONE);
-
-       if (is_swiotlb_buffer(paddr)) {
-               swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
-               return;
-       }
-
-       if (dir != DMA_FROM_DEVICE)
-               return;
-
-       dma_mark_clean(phys_to_virt(paddr), size);
-}
-
-void
-swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr,
-                           size_t size, enum dma_data_direction dir)
-{
-       swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr,
-                              size_t size, enum dma_data_direction dir)
-{
-       swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE);
-}
-
-/*
- * Map a set of buffers described by scatterlist in streaming mode for DMA.
- * This is the scatter-gather version of the above swiotlb_map_page
- * interface.  Here the scatter gather list elements are each tagged with the
- * appropriate dma address and length.  They are obtained via
- * sg_dma_{address,length}(SG).
- *
- * NOTE: An implementation may be able to use a smaller number of
- *       DMA address/length pairs than there are SG table elements.
- *       (for example via virtual mapping capabilities)
- *       The routine returns the number of addr/length pairs actually
- *       used, at most nents.
- *
- * Device ownership issues as mentioned above for swiotlb_map_page are the
- * same here.
- */
-int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
-                    enum dma_data_direction dir, unsigned long attrs)
-{
-       struct scatterlist *sg;
-       int i;
-
-       BUG_ON(dir == DMA_NONE);
-
-       for_each_sg(sgl, sg, nelems, i) {
-               phys_addr_t paddr = sg_phys(sg);
-               dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
-               if (swiotlb_force == SWIOTLB_FORCE ||
-                   !dma_capable(hwdev, dev_addr, sg->length)) {
-                       phys_addr_t map = map_single(hwdev, sg_phys(sg),
-                                                    sg->length, dir, attrs);
-                       if (map == SWIOTLB_MAP_ERROR) {
-                               /* Don't panic here, we expect map_sg users
-                                  to do proper error handling. */
-                               swiotlb_full(hwdev, sg->length, dir, 0);
-                               attrs |= DMA_ATTR_SKIP_CPU_SYNC;
-                               swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
-                                                      attrs);
-                               sg_dma_len(sgl) = 0;
-                               return 0;
-                       }
-                       sg->dma_address = __phys_to_dma(hwdev, map);
-               } else
-                       sg->dma_address = dev_addr;
-               sg_dma_len(sg) = sg->length;
-       }
-       return nelems;
-}
-
-/*
- * Unmap a set of streaming mode DMA translations.  Again, cpu read rules
- * concerning calls here are the same as for swiotlb_unmap_page() above.
- */
-void
-swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl,
-                      int nelems, enum dma_data_direction dir,
-                      unsigned long attrs)
-{
-       struct scatterlist *sg;
-       int i;
-
-       BUG_ON(dir == DMA_NONE);
-
-       for_each_sg(sgl, sg, nelems, i)
-               unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
-                            attrs);
-}
-
-/*
- * Make physical memory consistent for a set of streaming mode DMA translations
- * after a transfer.
- *
- * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules
- * and usage.
- */
-static void
-swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl,
-               int nelems, enum dma_data_direction dir,
-               enum dma_sync_target target)
-{
-       struct scatterlist *sg;
-       int i;
-
-       for_each_sg(sgl, sg, nelems, i)
-               swiotlb_sync_single(hwdev, sg->dma_address,
-                                   sg_dma_len(sg), dir, target);
-}
-
-void
-swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg,
-                       int nelems, enum dma_data_direction dir)
-{
-       swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU);
-}
-
-void
-swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
-                          int nelems, enum dma_data_direction dir)
-{
-       swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
-}
-
-int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
-       return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
-}
-
-/*
- * Return whether the given device DMA address mask can be supported
- * properly.  For example, if your device can only drive the low 24-bits
- * during bus mastering, then you would pass 0x00ffffff as the mask to
- * this function.
- */
-int
-swiotlb_dma_supported(struct device *hwdev, u64 mask)
-{
-       return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
-}
-
-void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
-               gfp_t gfp, unsigned long attrs)
-{
-       void *vaddr;
-
-       /* temporary workaround: */
-       if (gfp & __GFP_NOWARN)
-               attrs |= DMA_ATTR_NO_WARN;
-
-       /*
-        * Don't print a warning when the first allocation attempt fails.
-        * swiotlb_alloc_coherent() will print a warning when the DMA memory
-        * allocation ultimately failed.
-        */
-       gfp |= __GFP_NOWARN;
-
-       vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
-       if (!vaddr)
-               vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
-       return vaddr;
-}
-
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_addr, unsigned long attrs)
-{
-       if (!swiotlb_free_buffer(dev, size, dma_addr))
-               dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
-const struct dma_map_ops swiotlb_dma_ops = {
-       .mapping_error          = swiotlb_dma_mapping_error,
-       .alloc                  = swiotlb_alloc,
-       .free                   = swiotlb_free,
-       .sync_single_for_cpu    = swiotlb_sync_single_for_cpu,
-       .sync_single_for_device = swiotlb_sync_single_for_device,
-       .sync_sg_for_cpu        = swiotlb_sync_sg_for_cpu,
-       .sync_sg_for_device     = swiotlb_sync_sg_for_device,
-       .map_sg                 = swiotlb_map_sg_attrs,
-       .unmap_sg               = swiotlb_unmap_sg_attrs,
-       .map_page               = swiotlb_map_page,
-       .unmap_page             = swiotlb_unmap_page,
-       .dma_supported          = dma_direct_supported,
-};
index 347cc83..2e5d3df 100644 (file)
@@ -359,15 +359,8 @@ static void wb_shutdown(struct bdi_writeback *wb)
        spin_lock_bh(&wb->work_lock);
        if (!test_and_clear_bit(WB_registered, &wb->state)) {
                spin_unlock_bh(&wb->work_lock);
-               /*
-                * Wait for wb shutdown to finish if someone else is just
-                * running wb_shutdown(). Otherwise we could proceed to wb /
-                * bdi destruction before wb_shutdown() is finished.
-                */
-               wait_on_bit(&wb->state, WB_shutting_down, TASK_UNINTERRUPTIBLE);
                return;
        }
-       set_bit(WB_shutting_down, &wb->state);
        spin_unlock_bh(&wb->work_lock);
 
        cgwb_remove_from_bdi_list(wb);
@@ -379,12 +372,6 @@ static void wb_shutdown(struct bdi_writeback *wb)
        mod_delayed_work(bdi_wq, &wb->dwork, 0);
        flush_delayed_work(&wb->dwork);
        WARN_ON(!list_empty(&wb->work_list));
-       /*
-        * Make sure bit gets cleared after shutdown is finished. Matches with
-        * the barrier provided by test_and_clear_bit() above.
-        */
-       smp_wmb();
-       clear_and_wake_up_bit(WB_shutting_down, &wb->state);
 }
 
 static void wb_exit(struct bdi_writeback *wb)
@@ -508,10 +495,12 @@ static void cgwb_release_workfn(struct work_struct *work)
        struct bdi_writeback *wb = container_of(work, struct bdi_writeback,
                                                release_work);
 
+       mutex_lock(&wb->bdi->cgwb_release_mutex);
        wb_shutdown(wb);
 
        css_put(wb->memcg_css);
        css_put(wb->blkcg_css);
+       mutex_unlock(&wb->bdi->cgwb_release_mutex);
 
        fprop_local_destroy_percpu(&wb->memcg_completions);
        percpu_ref_exit(&wb->refcnt);
@@ -697,6 +686,7 @@ static int cgwb_bdi_init(struct backing_dev_info *bdi)
 
        INIT_RADIX_TREE(&bdi->cgwb_tree, GFP_ATOMIC);
        bdi->cgwb_congested_tree = RB_ROOT;
+       mutex_init(&bdi->cgwb_release_mutex);
 
        ret = wb_init(&bdi->wb, bdi, 1, GFP_KERNEL);
        if (!ret) {
@@ -717,7 +707,10 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
        spin_lock_irq(&cgwb_lock);
        radix_tree_for_each_slot(slot, &bdi->cgwb_tree, &iter, 0)
                cgwb_kill(*slot);
+       spin_unlock_irq(&cgwb_lock);
 
+       mutex_lock(&bdi->cgwb_release_mutex);
+       spin_lock_irq(&cgwb_lock);
        while (!list_empty(&bdi->wb_list)) {
                wb = list_first_entry(&bdi->wb_list, struct bdi_writeback,
                                      bdi_node);
@@ -726,6 +719,7 @@ static void cgwb_bdi_unregister(struct backing_dev_info *bdi)
                spin_lock_irq(&cgwb_lock);
        }
        spin_unlock_irq(&cgwb_lock);
+       mutex_unlock(&bdi->cgwb_release_mutex);
 }
 
 /**
index cc16d70..03d48d8 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/kmemleak.h>
 #include <linux/seq_file.h>
 #include <linux/memblock.h>
-#include <linux/bootmem.h>
 
 #include <asm/sections.h>
 #include <linux/io.h>
index 36b3ada..10462de 100644 (file)
@@ -252,8 +252,7 @@ static int br2684_xmit_vcc(struct sk_buff *skb, struct net_device *dev,
 
        ATM_SKB(skb)->vcc = atmvcc = brvcc->atmvcc;
        pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, atmvcc, atmvcc->dev);
-       refcount_add(skb->truesize, &sk_atm(atmvcc)->sk_wmem_alloc);
-       ATM_SKB(skb)->atm_options = atmvcc->atm_options;
+       atm_account_tx(atmvcc, skb);
        dev->stats.tx_packets++;
        dev->stats.tx_bytes += skb->len;
 
index 66caa48..d795b9c 100644 (file)
@@ -381,8 +381,7 @@ static netdev_tx_t clip_start_xmit(struct sk_buff *skb,
                memcpy(here, llc_oui, sizeof(llc_oui));
                ((__be16 *) here)[3] = skb->protocol;
        }
-       refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
-       ATM_SKB(skb)->atm_options = vcc->atm_options;
+       atm_account_tx(vcc, skb);
        entry->vccs->last_use = jiffies;
        pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n", skb, vcc, vcc->dev);
        old = xchg(&entry->vccs->xoff, 1);      /* assume XOFF ... */
index 1f2af59..ff5748b 100644 (file)
@@ -630,10 +630,9 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size)
                goto out;
        }
        pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize);
-       refcount_add(skb->truesize, &sk->sk_wmem_alloc);
+       atm_account_tx(vcc, skb);
 
        skb->dev = NULL; /* for paths shared with net_device interfaces */
-       ATM_SKB(skb)->atm_options = vcc->atm_options;
        if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) {
                kfree_skb(skb);
                error = -EFAULT;
index 5a95fcf..d7f5cf5 100644 (file)
@@ -182,9 +182,8 @@ lec_send(struct atm_vcc *vcc, struct sk_buff *skb)
        struct net_device *dev = skb->dev;
 
        ATM_SKB(skb)->vcc = vcc;
-       ATM_SKB(skb)->atm_options = vcc->atm_options;
+       atm_account_tx(vcc, skb);
 
-       refcount_add(skb->truesize, &sk_atm(vcc)->sk_wmem_alloc);
        if (vcc->send(vcc, skb) < 0) {
                dev->stats.tx_dropped++;
                return;
index 75620c2..24b53c4 100644 (file)
@@ -555,8 +555,7 @@ static int send_via_shortcut(struct sk_buff *skb, struct mpoa_client *mpc)
                                        sizeof(struct llc_snap_hdr));
        }
 
-       refcount_add(skb->truesize, &sk_atm(entry->shortcut)->sk_wmem_alloc);
-       ATM_SKB(skb)->atm_options = entry->shortcut->atm_options;
+       atm_account_tx(entry->shortcut, skb);
        entry->shortcut->send(entry->shortcut, skb);
        entry->packets_fwded++;
        mpc->in_ops->put(entry);
index 21d9d34..af8c4b3 100644 (file)
@@ -350,8 +350,7 @@ static int pppoatm_send(struct ppp_channel *chan, struct sk_buff *skb)
                return 1;
        }
 
-       refcount_add(skb->truesize, &sk_atm(ATM_SKB(skb)->vcc)->sk_wmem_alloc);
-       ATM_SKB(skb)->atm_options = ATM_SKB(skb)->vcc->atm_options;
+       atm_account_tx(vcc, skb);
        pr_debug("atm_skb(%p)->vcc(%p)->dev(%p)\n",
                 skb, ATM_SKB(skb)->vcc, ATM_SKB(skb)->vcc->dev);
        ret = ATM_SKB(skb)->vcc->send(ATM_SKB(skb)->vcc, skb)
index ee10e8d..b3ba44a 100644 (file)
@@ -35,8 +35,8 @@ static void atm_pop_raw(struct atm_vcc *vcc, struct sk_buff *skb)
        struct sock *sk = sk_atm(vcc);
 
        pr_debug("(%d) %d -= %d\n",
-                vcc->vci, sk_wmem_alloc_get(sk), skb->truesize);
-       WARN_ON(refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc));
+                vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize);
+       WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc));
        dev_kfree_skb_any(skb);
        sk->sk_write_space(sk);
 }
diff --git a/net/bpfilter/.gitignore b/net/bpfilter/.gitignore
new file mode 100644 (file)
index 0000000..e97084e
--- /dev/null
@@ -0,0 +1 @@
+bpfilter_umh
index e0bbe75..dd86b02 100644 (file)
@@ -21,8 +21,10 @@ endif
 # which bpfilter_kern.c passes further into umh blob loader at run-time
 quiet_cmd_copy_umh = GEN $@
       cmd_copy_umh = echo ':' > $(obj)/.bpfilter_umh.o.cmd; \
-      $(OBJCOPY) -I binary -O `$(OBJDUMP) -f $<|grep format|cut -d' ' -f8` \
-      -B `$(OBJDUMP) -f $<|grep architecture|cut -d, -f1|cut -d' ' -f2` \
+      $(OBJCOPY) -I binary \
+          `LC_ALL=C objdump -f net/bpfilter/bpfilter_umh \
+          |awk -F' |,' '/file format/{print "-O",$$NF} \
+          /^architecture:/{print "-B",$$2}'` \
       --rename-section .data=.init.rodata $< $@
 
 $(obj)/bpfilter_umh.o: $(obj)/bpfilter_umh
index 57b7bab..a5aa1c7 100644 (file)
@@ -8643,7 +8643,8 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
                /* We get here if we can't use the current device name */
                if (!pat)
                        goto out;
-               if (dev_get_valid_name(net, dev, pat) < 0)
+               err = dev_get_valid_name(net, dev, pat);
+               if (err < 0)
                        goto out;
        }
 
@@ -8655,7 +8656,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
        dev_close(dev);
 
        /* And unlink it from device chain */
-       err = -ENODEV;
        unlist_netdevice(dev);
 
        synchronize_net();
index 3d9ba7e..e7f12e9 100644 (file)
@@ -3214,20 +3214,6 @@ err:
 }
 EXPORT_SYMBOL_GPL(xdp_do_redirect);
 
-static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd)
-{
-       unsigned int len;
-
-       if (unlikely(!(fwd->flags & IFF_UP)))
-               return -ENETDOWN;
-
-       len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
-       if (skb->len > len)
-               return -EMSGSIZE;
-
-       return 0;
-}
-
 static int xdp_do_generic_redirect_map(struct net_device *dev,
                                       struct sk_buff *skb,
                                       struct xdp_buff *xdp,
@@ -3256,10 +3242,11 @@ static int xdp_do_generic_redirect_map(struct net_device *dev,
        }
 
        if (map->map_type == BPF_MAP_TYPE_DEVMAP) {
-               if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd))))
+               struct bpf_dtab_netdev *dst = fwd;
+
+               err = dev_map_generic_redirect(dst, skb, xdp_prog);
+               if (unlikely(err))
                        goto err;
-               skb->dev = fwd;
-               generic_xdp_tx(skb, xdp_prog);
        } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
                struct xdp_sock *xs = fwd;
 
index 31ff46d..3647167 100644 (file)
@@ -243,9 +243,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
                        bool dev_match = (sk->sk_bound_dev_if == dif ||
                                          sk->sk_bound_dev_if == sdif);
 
-                       if (exact_dif && !dev_match)
+                       if (!dev_match)
                                return -1;
-                       if (sk->sk_bound_dev_if && dev_match)
+                       if (sk->sk_bound_dev_if)
                                score += 4;
                }
                if (sk->sk_incoming_cpu == raw_smp_processor_id())
index af5a830..b3308e9 100644 (file)
@@ -1145,7 +1145,8 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
        cork->fragsize = ip_sk_use_pmtu(sk) ?
                         dst_mtu(&rt->dst) : rt->dst.dev->mtu;
 
-       cork->gso_size = sk->sk_type == SOCK_DGRAM ? ipc->gso_size : 0;
+       cork->gso_size = sk->sk_type == SOCK_DGRAM &&
+                        sk->sk_protocol == IPPROTO_UDP ? ipc->gso_size : 0;
        cork->dst = &rt->dst;
        cork->length = 0;
        cork->ttl = ipc->ttl;
index 2febe26..595ad40 100644 (file)
@@ -113,9 +113,9 @@ static inline int compute_score(struct sock *sk, struct net *net,
                        bool dev_match = (sk->sk_bound_dev_if == dif ||
                                          sk->sk_bound_dev_if == sdif);
 
-                       if (exact_dif && !dev_match)
+                       if (!dev_match)
                                return -1;
-                       if (sk->sk_bound_dev_if && dev_match)
+                       if (sk->sk_bound_dev_if)
                                score++;
                }
                if (sk->sk_incoming_cpu == raw_smp_processor_id())
index 39d1d48..1fb2f31 100644 (file)
@@ -167,8 +167,9 @@ struct fib6_info *fib6_info_alloc(gfp_t gfp_flags)
        return f6i;
 }
 
-void fib6_info_destroy(struct fib6_info *f6i)
+void fib6_info_destroy_rcu(struct rcu_head *head)
 {
+       struct fib6_info *f6i = container_of(head, struct fib6_info, rcu);
        struct rt6_exception_bucket *bucket;
        struct dst_metrics *m;
 
@@ -206,7 +207,7 @@ void fib6_info_destroy(struct fib6_info *f6i)
 
        kfree(f6i);
 }
-EXPORT_SYMBOL_GPL(fib6_info_destroy);
+EXPORT_SYMBOL_GPL(fib6_info_destroy_rcu);
 
 static struct fib6_node *node_alloc(struct net *net)
 {
index 021e5ae..a14fb4f 100644 (file)
@@ -1219,7 +1219,8 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
        if (mtu < IPV6_MIN_MTU)
                return -EINVAL;
        cork->base.fragsize = mtu;
-       cork->base.gso_size = sk->sk_type == SOCK_DGRAM ? ipc6->gso_size : 0;
+       cork->base.gso_size = sk->sk_type == SOCK_DGRAM &&
+                             sk->sk_protocol == IPPROTO_UDP ? ipc6->gso_size : 0;
 
        if (dst_allfrag(xfrm_dst_path(&rt->dst)))
                cork->base.flags |= IPCORK_ALLFRAG;
index e7b05de..25e483e 100644 (file)
@@ -73,8 +73,8 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp,
        ncm->data[2] = data;
        ncm->data[4] = ntohl(lsc->oem_status);
 
-       netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
-                   nc->id, data & 0x1 ? "up" : "down");
+       netdev_dbg(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n",
+                  nc->id, data & 0x1 ? "up" : "down");
 
        chained = !list_empty(&nc->link);
        state = nc->state;
@@ -148,9 +148,9 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp,
        hncdsc = (struct ncsi_aen_hncdsc_pkt *)h;
        ncm->data[3] = ntohl(hncdsc->status);
        spin_unlock_irqrestore(&nc->lock, flags);
-       netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-                     "NCSI: host driver %srunning on channel %u\n",
-                     ncm->data[3] & 0x1 ? "" : "not ", nc->id);
+       netdev_dbg(ndp->ndev.dev,
+                  "NCSI: host driver %srunning on channel %u\n",
+                  ncm->data[3] & 0x1 ? "" : "not ", nc->id);
 
        return 0;
 }
index 5561e22..0912847 100644 (file)
@@ -788,8 +788,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
                }
                break;
        case ncsi_dev_state_config_done:
-               netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-                             "NCSI: channel %u config done\n", nc->id);
+               netdev_dbg(ndp->ndev.dev, "NCSI: channel %u config done\n",
+                          nc->id);
                spin_lock_irqsave(&nc->lock, flags);
                if (nc->reconfigure_needed) {
                        /* This channel's configuration has been updated
@@ -804,8 +804,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
                        list_add_tail_rcu(&nc->link, &ndp->channel_queue);
                        spin_unlock_irqrestore(&ndp->lock, flags);
 
-                       netdev_printk(KERN_DEBUG, dev,
-                                     "Dirty NCSI channel state reset\n");
+                       netdev_dbg(dev, "Dirty NCSI channel state reset\n");
                        ncsi_process_next_channel(ndp);
                        break;
                }
@@ -816,9 +815,9 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp)
                } else {
                        hot_nc = NULL;
                        nc->state = NCSI_CHANNEL_INACTIVE;
-                       netdev_warn(ndp->ndev.dev,
-                                   "NCSI: channel %u link down after config\n",
-                                   nc->id);
+                       netdev_dbg(ndp->ndev.dev,
+                                  "NCSI: channel %u link down after config\n",
+                                  nc->id);
                }
                spin_unlock_irqrestore(&nc->lock, flags);
 
@@ -908,9 +907,9 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp)
        }
 
        ncm = &found->modes[NCSI_MODE_LINK];
-       netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-                     "NCSI: Channel %u added to queue (link %s)\n",
-                     found->id, ncm->data[2] & 0x1 ? "up" : "down");
+       netdev_dbg(ndp->ndev.dev,
+                  "NCSI: Channel %u added to queue (link %s)\n",
+                  found->id, ncm->data[2] & 0x1 ? "up" : "down");
 
 out:
        spin_lock_irqsave(&ndp->lock, flags);
@@ -1199,14 +1198,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp)
        switch (old_state) {
        case NCSI_CHANNEL_INACTIVE:
                ndp->ndev.state = ncsi_dev_state_config;
-               netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n",
-                           nc->id);
+               netdev_dbg(ndp->ndev.dev, "NCSI: configuring channel %u\n",
+                          nc->id);
                ncsi_configure_channel(ndp);
                break;
        case NCSI_CHANNEL_ACTIVE:
                ndp->ndev.state = ncsi_dev_state_suspend;
-               netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n",
-                           nc->id);
+               netdev_dbg(ndp->ndev.dev, "NCSI: suspending channel %u\n",
+                          nc->id);
                ncsi_suspend_channel(ndp);
                break;
        default:
@@ -1226,8 +1225,6 @@ out:
                return ncsi_choose_active_channel(ndp);
        }
 
-       netdev_printk(KERN_DEBUG, ndp->ndev.dev,
-                     "NCSI: No more channels to process\n");
        ncsi_report_link(ndp, false);
        return -ENODEV;
 }
@@ -1318,9 +1315,9 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
                                if ((ndp->ndev.state & 0xff00) ==
                                                ncsi_dev_state_config ||
                                                !list_empty(&nc->link)) {
-                                       netdev_printk(KERN_DEBUG, nd->dev,
-                                                     "NCSI: channel %p marked dirty\n",
-                                                     nc);
+                                       netdev_dbg(nd->dev,
+                                                  "NCSI: channel %p marked dirty\n",
+                                                  nc);
                                        nc->reconfigure_needed = true;
                                }
                                spin_unlock_irqrestore(&nc->lock, flags);
@@ -1338,8 +1335,7 @@ static int ncsi_kick_channels(struct ncsi_dev_priv *ndp)
                        list_add_tail_rcu(&nc->link, &ndp->channel_queue);
                        spin_unlock_irqrestore(&ndp->lock, flags);
 
-                       netdev_printk(KERN_DEBUG, nd->dev,
-                                     "NCSI: kicked channel %p\n", nc);
+                       netdev_dbg(nd->dev, "NCSI: kicked channel %p\n", nc);
                        n++;
                }
        }
@@ -1370,8 +1366,8 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
        list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) {
                n_vids++;
                if (vlan->vid == vid) {
-                       netdev_printk(KERN_DEBUG, dev,
-                                     "NCSI: vid %u already registered\n", vid);
+                       netdev_dbg(dev, "NCSI: vid %u already registered\n",
+                                  vid);
                        return 0;
                }
        }
@@ -1390,7 +1386,7 @@ int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
        vlan->vid = vid;
        list_add_rcu(&vlan->list, &ndp->vlan_vids);
 
-       netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid);
+       netdev_dbg(dev, "NCSI: Added new vid %u\n", vid);
 
        found = ncsi_kick_channels(ndp) != 0;
 
@@ -1419,8 +1415,7 @@ int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
        /* Remove the VLAN id from our internal list */
        list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list)
                if (vlan->vid == vid) {
-                       netdev_printk(KERN_DEBUG, dev,
-                                     "NCSI: vid %u found, removing\n", vid);
+                       netdev_dbg(dev, "NCSI: vid %u found, removing\n", vid);
                        list_del_rcu(&vlan->list);
                        found = true;
                        kfree(vlan);
@@ -1547,7 +1542,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd)
                }
        }
 
-       netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n");
+       netdev_dbg(ndp->ndev.dev, "NCSI: Stopping device\n");
        ncsi_report_link(ndp, true);
 }
 EXPORT_SYMBOL_GPL(ncsi_stop_dev);
index 8527cfd..20d7d36 100644 (file)
@@ -415,7 +415,8 @@ static void tcf_ife_cleanup(struct tc_action *a)
        spin_unlock_bh(&ife->tcf_lock);
 
        p = rcu_dereference_protected(ife->params, 1);
-       kfree_rcu(p, rcu);
+       if (p)
+               kfree_rcu(p, rcu);
 }
 
 /* under ife->tcf_lock for existing action */
@@ -516,8 +517,6 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                        saddr = nla_data(tb[TCA_IFE_SMAC]);
        }
 
-       ife->tcf_action = parm->action;
-
        if (parm->flags & IFE_ENCODE) {
                if (daddr)
                        ether_addr_copy(p->eth_dst, daddr);
@@ -543,10 +542,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla,
                                       NULL, NULL);
                if (err) {
 metadata_parse_err:
-                       if (exists)
-                               tcf_idr_release(*a, bind);
                        if (ret == ACT_P_CREATED)
-                               _tcf_ife_cleanup(*a);
+                               tcf_idr_release(*a, bind);
 
                        if (exists)
                                spin_unlock_bh(&ife->tcf_lock);
@@ -567,7 +564,7 @@ metadata_parse_err:
                err = use_all_metadata(ife);
                if (err) {
                        if (ret == ACT_P_CREATED)
-                               _tcf_ife_cleanup(*a);
+                               tcf_idr_release(*a, bind);
 
                        if (exists)
                                spin_unlock_bh(&ife->tcf_lock);
@@ -576,6 +573,7 @@ metadata_parse_err:
                }
        }
 
+       ife->tcf_action = parm->action;
        if (exists)
                spin_unlock_bh(&ife->tcf_lock);
 
index c98a61e..9c4c2bb 100644 (file)
@@ -21,7 +21,7 @@ static int blackhole_enqueue(struct sk_buff *skb, struct Qdisc *sch,
                             struct sk_buff **to_free)
 {
        qdisc_drop(skb, sch, to_free);
-       return NET_XMIT_SUCCESS;
+       return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
 }
 
 static struct sk_buff *blackhole_dequeue(struct Qdisc *sch)
index 3c85af0..3fabf9f 100644 (file)
@@ -987,8 +987,6 @@ bool xprt_prepare_transmit(struct rpc_task *task)
                task->tk_status = -EAGAIN;
                goto out_unlock;
        }
-       if (!bc_prealloc(req) && !req->rq_xmit_bytes_sent)
-               req->rq_xid = xprt_alloc_xid(xprt);
        ret = true;
 out_unlock:
        spin_unlock_bh(&xprt->transport_lock);
@@ -1298,7 +1296,12 @@ void xprt_retry_reserve(struct rpc_task *task)
 
 static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
 {
-       return (__force __be32)xprt->xid++;
+       __be32 xid;
+
+       spin_lock(&xprt->reserve_lock);
+       xid = (__force __be32)xprt->xid++;
+       spin_unlock(&xprt->reserve_lock);
+       return xid;
 }
 
 static inline void xprt_init_xid(struct rpc_xprt *xprt)
@@ -1316,6 +1319,7 @@ void xprt_request_init(struct rpc_task *task)
        req->rq_task    = task;
        req->rq_xprt    = xprt;
        req->rq_buffer  = NULL;
+       req->rq_xid     = xprt_alloc_xid(xprt);
        req->rq_connect_cookie = xprt->connect_cookie - 1;
        req->rq_bytes_sent = 0;
        req->rq_snd_buf.len = 0;
index 36919a2..3b3410a 100644 (file)
@@ -118,6 +118,9 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
        u64 addr;
        int err;
 
+       if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index)
+               return -EINVAL;
+
        if (!xskq_peek_addr(xs->umem->fq, &addr) ||
            len > xs->umem->chunk_size_nohr) {
                xs->rx_dropped++;
index 34d9e9c..e7889f4 100644 (file)
@@ -239,6 +239,7 @@ cmd_record_mcount =                                         \
             "$(CC_FLAGS_FTRACE)" ]; then                       \
                $(sub_cmd_record_mcount)                        \
        fi;
+endif # -record-mcount
 endif # CONFIG_FTRACE_MCOUNT_RECORD
 
 ifdef CONFIG_STACK_VALIDATION
@@ -263,7 +264,6 @@ ifneq ($(RETPOLINE_CFLAGS),)
   objtool_args += --retpoline
 endif
 endif
-endif
 
 
 ifdef CONFIG_MODVERSIONS
index ac6b1a1..b76b77d 100644 (file)
@@ -29,9 +29,10 @@ static bool has_perf_query_support(void)
        if (perf_query_supported)
                goto out;
 
-       fd = open(bin_name, O_RDONLY);
+       fd = open("/", O_RDONLY);
        if (fd < 0) {
-               p_err("perf_query_support: %s", strerror(errno));
+               p_err("perf_query_support: cannot open directory \"/\" (%s)",
+                     strerror(errno));
                goto out;
        }
 
index a4f4352..05f42a4 100644 (file)
@@ -90,7 +90,9 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
        }
 
        wallclock_secs = (real_time_ts.tv_sec - boot_time_ts.tv_sec) +
-               nsecs / 1000000000;
+               (real_time_ts.tv_nsec - boot_time_ts.tv_nsec + nsecs) /
+               1000000000;
+
 
        if (!localtime_r(&wallclock_secs, &load_tm)) {
                snprintf(buf, size, "%llu", nsecs / 1000000000);
index ca9ef70..d39e4ff 100644 (file)
@@ -56,7 +56,7 @@ name as necessary to disambiguate it from others is necessary.  Note that option
 .PP
 \fB--hide column\fP do not show the specified built-in columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--hide sysfs" to hide the sysfs statistics columns as a group.
 .PP
-\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default.  Currently the only built-in counters disabled by default are "usec" and "Time_Of_Day_Seconds".
+\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default.  Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC".
 The column name "all" can be used to enable all disabled-by-default built-in counters.
 .PP
 \fB--show column\fP show only the specified built-in columns.  May be invoked multiple times, or with a comma-separated list of column names.  Use "--show sysfs" to show the sysfs statistics columns as a group.
index d6cff30..4d14bbb 100644 (file)
@@ -109,6 +109,7 @@ unsigned int has_hwp_activity_window;       /* IA32_HWP_REQUEST[bits 41:32] */
 unsigned int has_hwp_epp;              /* IA32_HWP_REQUEST[bits 31:24] */
 unsigned int has_hwp_pkg;              /* IA32_HWP_REQUEST_PKG */
 unsigned int has_misc_feature_control;
+unsigned int first_counter_read = 1;
 
 #define RAPL_PKG               (1 << 0)
                                        /* 0x610 MSR_PKG_POWER_LIMIT */
@@ -170,6 +171,8 @@ struct thread_data {
        unsigned long long  irq_count;
        unsigned int smi_count;
        unsigned int cpu_id;
+       unsigned int apic_id;
+       unsigned int x2apic_id;
        unsigned int flags;
 #define CPU_IS_FIRST_THREAD_IN_CORE    0x2
 #define CPU_IS_FIRST_CORE_IN_PACKAGE   0x4
@@ -381,19 +384,23 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
 }
 
 /*
- * Each string in this array is compared in --show and --hide cmdline.
- * Thus, strings that are proper sub-sets must follow their more specific peers.
+ * This list matches the column headers, except
+ * 1. built-in only, the sysfs counters are not here -- we learn of those at run-time
+ * 2. Core and CPU are moved to the end, we can't have strings that contain them
+ *    matching on them for --show and --hide.
  */
 struct msr_counter bic[] = {
        { 0x0, "usec" },
        { 0x0, "Time_Of_Day_Seconds" },
        { 0x0, "Package" },
+       { 0x0, "Node" },
        { 0x0, "Avg_MHz" },
+       { 0x0, "Busy%" },
        { 0x0, "Bzy_MHz" },
        { 0x0, "TSC_MHz" },
        { 0x0, "IRQ" },
        { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
-       { 0x0, "Busy%" },
+       { 0x0, "sysfs" },
        { 0x0, "CPU%c1" },
        { 0x0, "CPU%c3" },
        { 0x0, "CPU%c6" },
@@ -424,73 +431,73 @@ struct msr_counter bic[] = {
        { 0x0, "Cor_J" },
        { 0x0, "GFX_J" },
        { 0x0, "RAM_J" },
-       { 0x0, "Core" },
-       { 0x0, "CPU" },
        { 0x0, "Mod%c6" },
-       { 0x0, "sysfs" },
        { 0x0, "Totl%C0" },
        { 0x0, "Any%C0" },
        { 0x0, "GFX%C0" },
        { 0x0, "CPUGFX%" },
-       { 0x0, "Node%" },
+       { 0x0, "Core" },
+       { 0x0, "CPU" },
+       { 0x0, "APIC" },
+       { 0x0, "X2APIC" },
 };
 
-
-
 #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
 #define        BIC_USEC        (1ULL << 0)
 #define        BIC_TOD         (1ULL << 1)
 #define        BIC_Package     (1ULL << 2)
-#define        BIC_Avg_MHz     (1ULL << 3)
-#define        BIC_Bzy_MHz     (1ULL << 4)
-#define        BIC_TSC_MHz     (1ULL << 5)
-#define        BIC_IRQ         (1ULL << 6)
-#define        BIC_SMI         (1ULL << 7)
-#define        BIC_Busy        (1ULL << 8)
-#define        BIC_CPU_c1      (1ULL << 9)
-#define        BIC_CPU_c3      (1ULL << 10)
-#define        BIC_CPU_c6      (1ULL << 11)
-#define        BIC_CPU_c7      (1ULL << 12)
-#define        BIC_ThreadC     (1ULL << 13)
-#define        BIC_CoreTmp     (1ULL << 14)
-#define        BIC_CoreCnt     (1ULL << 15)
-#define        BIC_PkgTmp      (1ULL << 16)
-#define        BIC_GFX_rc6     (1ULL << 17)
-#define        BIC_GFXMHz      (1ULL << 18)
-#define        BIC_Pkgpc2      (1ULL << 19)
-#define        BIC_Pkgpc3      (1ULL << 20)
-#define        BIC_Pkgpc6      (1ULL << 21)
-#define        BIC_Pkgpc7      (1ULL << 22)
-#define        BIC_Pkgpc8      (1ULL << 23)
-#define        BIC_Pkgpc9      (1ULL << 24)
-#define        BIC_Pkgpc10     (1ULL << 25)
-#define BIC_CPU_LPI    (1ULL << 26)
-#define BIC_SYS_LPI    (1ULL << 27)
-#define        BIC_PkgWatt     (1ULL << 26)
-#define        BIC_CorWatt     (1ULL << 27)
-#define        BIC_GFXWatt     (1ULL << 28)
-#define        BIC_PkgCnt      (1ULL << 29)
-#define        BIC_RAMWatt     (1ULL << 30)
-#define        BIC_PKG__       (1ULL << 31)
-#define        BIC_RAM__       (1ULL << 32)
-#define        BIC_Pkg_J       (1ULL << 33)
-#define        BIC_Cor_J       (1ULL << 34)
-#define        BIC_GFX_J       (1ULL << 35)
-#define        BIC_RAM_J       (1ULL << 36)
-#define        BIC_Core        (1ULL << 37)
-#define        BIC_CPU         (1ULL << 38)
-#define        BIC_Mod_c6      (1ULL << 39)
-#define        BIC_sysfs       (1ULL << 40)
-#define        BIC_Totl_c0     (1ULL << 41)
-#define        BIC_Any_c0      (1ULL << 42)
-#define        BIC_GFX_c0      (1ULL << 43)
-#define        BIC_CPUGFX      (1ULL << 44)
-#define        BIC_Node        (1ULL << 45)
-
-#define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD)
+#define        BIC_Node        (1ULL << 3)
+#define        BIC_Avg_MHz     (1ULL << 4)
+#define        BIC_Busy        (1ULL << 5)
+#define        BIC_Bzy_MHz     (1ULL << 6)
+#define        BIC_TSC_MHz     (1ULL << 7)
+#define        BIC_IRQ         (1ULL << 8)
+#define        BIC_SMI         (1ULL << 9)
+#define        BIC_sysfs       (1ULL << 10)
+#define        BIC_CPU_c1      (1ULL << 11)
+#define        BIC_CPU_c3      (1ULL << 12)
+#define        BIC_CPU_c6      (1ULL << 13)
+#define        BIC_CPU_c7      (1ULL << 14)
+#define        BIC_ThreadC     (1ULL << 15)
+#define        BIC_CoreTmp     (1ULL << 16)
+#define        BIC_CoreCnt     (1ULL << 17)
+#define        BIC_PkgTmp      (1ULL << 18)
+#define        BIC_GFX_rc6     (1ULL << 19)
+#define        BIC_GFXMHz      (1ULL << 20)
+#define        BIC_Pkgpc2      (1ULL << 21)
+#define        BIC_Pkgpc3      (1ULL << 22)
+#define        BIC_Pkgpc6      (1ULL << 23)
+#define        BIC_Pkgpc7      (1ULL << 24)
+#define        BIC_Pkgpc8      (1ULL << 25)
+#define        BIC_Pkgpc9      (1ULL << 26)
+#define        BIC_Pkgpc10     (1ULL << 27)
+#define BIC_CPU_LPI    (1ULL << 28)
+#define BIC_SYS_LPI    (1ULL << 29)
+#define        BIC_PkgWatt     (1ULL << 30)
+#define        BIC_CorWatt     (1ULL << 31)
+#define        BIC_GFXWatt     (1ULL << 32)
+#define        BIC_PkgCnt      (1ULL << 33)
+#define        BIC_RAMWatt     (1ULL << 34)
+#define        BIC_PKG__       (1ULL << 35)
+#define        BIC_RAM__       (1ULL << 36)
+#define        BIC_Pkg_J       (1ULL << 37)
+#define        BIC_Cor_J       (1ULL << 38)
+#define        BIC_GFX_J       (1ULL << 39)
+#define        BIC_RAM_J       (1ULL << 40)
+#define        BIC_Mod_c6      (1ULL << 41)
+#define        BIC_Totl_c0     (1ULL << 42)
+#define        BIC_Any_c0      (1ULL << 43)
+#define        BIC_GFX_c0      (1ULL << 44)
+#define        BIC_CPUGFX      (1ULL << 45)
+#define        BIC_Core        (1ULL << 46)
+#define        BIC_CPU         (1ULL << 47)
+#define        BIC_APIC        (1ULL << 48)
+#define        BIC_X2APIC      (1ULL << 49)
+
+#define BIC_DISABLED_BY_DEFAULT        (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
 
 unsigned long long bic_enabled = (0xFFFFFFFFFFFFFFFFULL & ~BIC_DISABLED_BY_DEFAULT);
-unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs;
+unsigned long long bic_present = BIC_USEC | BIC_TOD | BIC_sysfs | BIC_APIC | BIC_X2APIC;
 
 #define DO_BIC(COUNTER_NAME) (bic_enabled & bic_present & COUNTER_NAME)
 #define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
@@ -517,17 +524,34 @@ void help(void)
        "when COMMAND completes.\n"
        "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
        "to print statistics, until interrupted.\n"
-       "--add          add a counter\n"
-       "               eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
-       "--cpu  cpu-set limit output to summary plus cpu-set:\n"
-       "               {core | package | j,k,l..m,n-p }\n"
-       "--quiet        skip decoding system configuration header\n"
-       "--interval sec.subsec  Override default 5-second measurement interval\n"
-       "--help         print this help message\n"
-       "--list         list column headers only\n"
-       "--num_iterations num   number of the measurement iterations\n"
-       "--out file     create or truncate \"file\" for all output\n"
-       "--version      print version information\n"
+       "  -a, --add    add a counter\n"
+       "                 eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+       "  -c, --cpu    cpu-set limit output to summary plus cpu-set:\n"
+       "                 {core | package | j,k,l..m,n-p }\n"
+       "  -d, --debug  displays usec, Time_Of_Day_Seconds and more debugging\n"
+       "  -D, --Dump   displays the raw counter values\n"
+       "  -e, --enable [all | column]\n"
+       "               shows all or the specified disabled column\n"
+       "  -H, --hide [column|column,column,...]\n"
+       "               hide the specified column(s)\n"
+       "  -i, --interval sec.subsec\n"
+       "               Override default 5-second measurement interval\n"
+       "  -J, --Joules displays energy in Joules instead of Watts\n"
+       "  -l, --list   list column headers only\n"
+       "  -n, --num_iterations num\n"
+       "               number of the measurement iterations\n"
+       "  -o, --out file\n"
+       "               create or truncate \"file\" for all output\n"
+       "  -q, --quiet  skip decoding system configuration header\n"
+       "  -s, --show [column|column,column,...]\n"
+       "               show only the specified column(s)\n"
+       "  -S, --Summary\n"
+       "               limits output to 1-line system summary per interval\n"
+       "  -T, --TCC temperature\n"
+       "               sets the Thermal Control Circuit temperature in\n"
+       "                 degrees Celsius\n"
+       "  -h, --help   print this help message\n"
+       "  -v, --version        print version information\n"
        "\n"
        "For more help, run \"man turbostat\"\n");
 }
@@ -601,6 +625,10 @@ void print_header(char *delim)
                outp += sprintf(outp, "%sCore", (printed++ ? delim : ""));
        if (DO_BIC(BIC_CPU))
                outp += sprintf(outp, "%sCPU", (printed++ ? delim : ""));
+       if (DO_BIC(BIC_APIC))
+               outp += sprintf(outp, "%sAPIC", (printed++ ? delim : ""));
+       if (DO_BIC(BIC_X2APIC))
+               outp += sprintf(outp, "%sX2APIC", (printed++ ? delim : ""));
        if (DO_BIC(BIC_Avg_MHz))
                outp += sprintf(outp, "%sAvg_MHz", (printed++ ? delim : ""));
        if (DO_BIC(BIC_Busy))
@@ -880,6 +908,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
                if (DO_BIC(BIC_CPU))
                        outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+               if (DO_BIC(BIC_APIC))
+                       outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
+               if (DO_BIC(BIC_X2APIC))
+                       outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
        } else {
                if (DO_BIC(BIC_Package)) {
                        if (p)
@@ -904,6 +936,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
                }
                if (DO_BIC(BIC_CPU))
                        outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->cpu_id);
+               if (DO_BIC(BIC_APIC))
+                       outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->apic_id);
+               if (DO_BIC(BIC_X2APIC))
+                       outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), t->x2apic_id);
        }
 
        if (DO_BIC(BIC_Avg_MHz))
@@ -1231,6 +1267,12 @@ delta_thread(struct thread_data *new, struct thread_data *old,
        int i;
        struct msr_counter *mp;
 
+       /* we run cpuid just the 1st time, copy the results */
+       if (DO_BIC(BIC_APIC))
+               new->apic_id = old->apic_id;
+       if (DO_BIC(BIC_X2APIC))
+               new->x2apic_id = old->x2apic_id;
+
        /*
         * the timestamps from start of measurement interval are in "old"
         * the timestamp from end of measurement interval are in "new"
@@ -1393,6 +1435,12 @@ int sum_counters(struct thread_data *t, struct core_data *c,
        int i;
        struct msr_counter *mp;
 
+       /* copy un-changing apic_id's */
+       if (DO_BIC(BIC_APIC))
+               average.threads.apic_id = t->apic_id;
+       if (DO_BIC(BIC_X2APIC))
+               average.threads.x2apic_id = t->x2apic_id;
+
        /* remember first tv_begin */
        if (average.threads.tv_begin.tv_sec == 0)
                average.threads.tv_begin = t->tv_begin;
@@ -1619,6 +1667,34 @@ int get_mp(int cpu, struct msr_counter *mp, unsigned long long *counterp)
        return 0;
 }
 
+void get_apic_id(struct thread_data *t)
+{
+       unsigned int eax, ebx, ecx, edx, max_level;
+
+       eax = ebx = ecx = edx = 0;
+
+       if (!genuine_intel)
+               return;
+
+       __cpuid(0, max_level, ebx, ecx, edx);
+
+       __cpuid(1, eax, ebx, ecx, edx);
+       t->apic_id = (ebx >> 24) & 0xf;
+
+       if (max_level < 0xb)
+               return;
+
+       if (!DO_BIC(BIC_X2APIC))
+               return;
+
+       ecx = 0;
+       __cpuid(0xb, eax, ebx, ecx, edx);
+       t->x2apic_id = edx;
+
+       if (debug && (t->apic_id != t->x2apic_id))
+               fprintf(stderr, "cpu%d: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
+}
+
 /*
  * get_counters(...)
  * migrate to cpu
@@ -1632,7 +1708,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
        struct msr_counter *mp;
        int i;
 
-
        gettimeofday(&t->tv_begin, (struct timezone *)NULL);
 
        if (cpu_migrate(cpu)) {
@@ -1640,6 +1715,8 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
                return -1;
        }
 
+       if (first_counter_read)
+               get_apic_id(t);
 retry:
        t->tsc = rdtsc();       /* we are running on local CPU of interest */
 
@@ -2432,6 +2509,12 @@ void set_node_data(void)
                if (pni[pkg].count > topo.nodes_per_pkg)
                        topo.nodes_per_pkg = pni[0].count;
 
+       /* Fake 1 node per pkg for machines that don't
+        * expose nodes and thus avoid -nan results
+        */
+       if (topo.nodes_per_pkg == 0)
+               topo.nodes_per_pkg = 1;
+
        for (cpu = 0; cpu < topo.num_cpus; cpu++) {
                pkg = cpus[cpu].physical_package_id;
                node = cpus[cpu].physical_node_id;
@@ -2879,6 +2962,7 @@ void do_sleep(void)
        }
 }
 
+
 void turbostat_loop()
 {
        int retval;
@@ -2892,6 +2976,7 @@ restart:
 
        snapshot_proc_sysfs_files();
        retval = for_all_cpus(get_counters, EVEN_COUNTERS);
+       first_counter_read = 0;
        if (retval < -1) {
                exit(retval);
        } else if (retval == -1) {
@@ -4392,7 +4477,7 @@ void process_cpuid()
        if (!quiet) {
                fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
                        max_level, family, model, stepping, family, model, stepping);
-               fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
+               fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
                        ecx & (1 << 0) ? "SSE3" : "-",
                        ecx & (1 << 3) ? "MONITOR" : "-",
                        ecx & (1 << 6) ? "SMX" : "-",
@@ -4401,6 +4486,7 @@ void process_cpuid()
                        edx & (1 << 4) ? "TSC" : "-",
                        edx & (1 << 5) ? "MSR" : "-",
                        edx & (1 << 22) ? "ACPI-TM" : "-",
+                       edx & (1 << 28) ? "HT" : "-",
                        edx & (1 << 29) ? "TM" : "-");
        }
 
@@ -4652,7 +4738,6 @@ void process_cpuid()
        return;
 }
 
-
 /*
  * in /dev/cpu/ return success for names that are numbers
  * ie. filter out ".", "..", "microcode".
@@ -4842,6 +4927,13 @@ void init_counter(struct thread_data *thread_base, struct core_data *core_base,
        struct core_data *c;
        struct pkg_data *p;
 
+
+       /* Workaround for systems where physical_node_id==-1
+        * and logical_node_id==(-1 - topo.num_cpus)
+        */
+       if (node_id < 0)
+               node_id = 0;
+
        t = GET_THREAD(thread_base, thread_id, core_id, node_id, pkg_id);
        c = GET_CORE(core_base, core_id, node_id, pkg_id);
        p = GET_PKG(pkg_base, pkg_id);
@@ -4946,6 +5038,7 @@ int fork_it(char **argv)
 
        snapshot_proc_sysfs_files();
        status = for_all_cpus(get_counters, EVEN_COUNTERS);
+       first_counter_read = 0;
        if (status)
                exit(status);
        /* clear affinity side-effect of get_counters() */
@@ -5009,7 +5102,7 @@ int get_and_dump_counters(void)
 }
 
 void print_version() {
-       fprintf(outf, "turbostat version 18.06.01"
+       fprintf(outf, "turbostat version 18.06.20"
                " - Len Brown <lenb@kernel.org>\n");
 }
 
@@ -5381,7 +5474,7 @@ void cmdline(int argc, char **argv)
                        break;
                case 'e':
                        /* --enable specified counter */
-                       bic_enabled |= bic_lookup(optarg, SHOW_LIST);
+                       bic_enabled = bic_enabled | bic_lookup(optarg, SHOW_LIST);
                        break;
                case 'd':
                        debug++;
@@ -5465,7 +5558,6 @@ void cmdline(int argc, char **argv)
 int main(int argc, char **argv)
 {
        outf = stderr;
-
        cmdline(argc, argv);
 
        if (!quiet)
index 1eefe21..7eb613f 100644 (file)
@@ -7,3 +7,13 @@ CONFIG_CGROUP_BPF=y
 CONFIG_NETDEVSIM=m
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_IPIP=y
+CONFIG_IPV6=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPGRE=y
+CONFIG_IPV6_GRE=y
+CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_HMAC=m
+CONFIG_CRYPTO_SHA256=m
+CONFIG_VXLAN=y
+CONFIG_GENEVE=y
index e78aad0..be800d0 100755 (executable)
@@ -163,6 +163,10 @@ def bpftool(args, JSON=True, ns="", fail=True):
 
 def bpftool_prog_list(expected=None, ns=""):
     _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
+    # Remove the base progs
+    for p in base_progs:
+        if p in progs:
+            progs.remove(p)
     if expected is not None:
         if len(progs) != expected:
             fail(True, "%d BPF programs loaded, expected %d" %
@@ -171,6 +175,10 @@ def bpftool_prog_list(expected=None, ns=""):
 
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
+    # Remove the base maps
+    for m in base_maps:
+        if m in maps:
+            maps.remove(m)
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
@@ -585,8 +593,8 @@ skip(os.getuid() != 0, "test must be run as root")
 # Check tools
 ret, progs = bpftool("prog", fail=False)
 skip(ret != 0, "bpftool not installed")
-# Check no BPF programs are loaded
-skip(len(progs) != 0, "BPF programs already loaded on the system")
+base_progs = progs
+_, base_maps = bpftool("map")
 
 # Check netdevsim
 ret, out = cmd("modprobe netdevsim", fail=False)
index aeb2901..546aee3 100755 (executable)
@@ -608,28 +608,26 @@ setup_xfrm_tunnel()
 test_xfrm_tunnel()
 {
        config_device
-        #tcpdump -nei veth1 ip &
-       output=$(mktemp)
-       cat /sys/kernel/debug/tracing/trace_pipe | tee $output &
-        setup_xfrm_tunnel
+       > /sys/kernel/debug/tracing/trace
+       setup_xfrm_tunnel
        tc qdisc add dev veth1 clsact
        tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
                sec xfrm_get_state
        ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
        sleep 1
-       grep "reqid 1" $output
+       grep "reqid 1" /sys/kernel/debug/tracing/trace
        check_err $?
-       grep "spi 0x1" $output
+       grep "spi 0x1" /sys/kernel/debug/tracing/trace
        check_err $?
-       grep "remote ip 0xac100164" $output
+       grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
        check_err $?
        cleanup
 
        if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: xfrm tunnel"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+               echo -e ${RED}"FAIL: xfrm tunnel"${NC}
+               return 1
+       fi
+       echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
 }
 
 attach_bpf()
@@ -657,6 +655,10 @@ cleanup()
        ip link del ip6geneve11 2> /dev/null
        ip link del erspan11 2> /dev/null
        ip link del ip6erspan11 2> /dev/null
+       ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
+       ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
+       ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
+       ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
 }
 
 cleanup_exit()
@@ -668,7 +670,7 @@ cleanup_exit()
 
 check()
 {
-       ip link help $1 2>&1 | grep -q "^Usage:"
+       ip link help 2>&1 | grep -q "\s$1\s"
        if [ $? -ne 0 ];then
                echo "SKIP $1: iproute2 not support"
        cleanup
index 6ccb154..22f8df1 100755 (executable)
@@ -7,13 +7,16 @@
 #
 # Released under the terms of the GPL v2.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./common_tests
 
 if [ -e $REBOOT_FLAG  ]; then
     rm $REBOOT_FLAG
 else
     prlog "pstore_crash_test has not been executed yet. we skip further tests."
-    exit 0
+    exit $ksft_skip
 fi
 
 prlog -n "Mounting pstore filesystem ... "
index 6a9f602..6152523 100644 (file)
@@ -137,6 +137,30 @@ unsigned int yield_mod_cnt, nr_abort;
        "subic. %%" INJECT_ASM_REG ", %%" INJECT_ASM_REG ", 1\n\t" \
        "bne 222b\n\t" \
        "333:\n\t"
+
+#elif defined(__mips__)
+
+#define RSEQ_INJECT_INPUT \
+       , [loop_cnt_1]"m"(loop_cnt[1]) \
+       , [loop_cnt_2]"m"(loop_cnt[2]) \
+       , [loop_cnt_3]"m"(loop_cnt[3]) \
+       , [loop_cnt_4]"m"(loop_cnt[4]) \
+       , [loop_cnt_5]"m"(loop_cnt[5]) \
+       , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "$5"
+
+#define RSEQ_INJECT_CLOBBER \
+       , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+       "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+       "beqz " INJECT_ASM_REG ", 333f\n\t" \
+       "222:\n\t" \
+       "addiu " INJECT_ASM_REG ", -1\n\t" \
+       "bnez " INJECT_ASM_REG ", 222b\n\t" \
+       "333:\n\t"
+
 #else
 #error unsupported target
 #endif
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
new file mode 100644 (file)
index 0000000..7f48ecf
--- /dev/null
@@ -0,0 +1,725 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton <paul.burton@mips.com>
+ * (C) Copyright 2018 MIPS Tech LLC
+ *
+ * Based on rseq-arm.h:
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#define RSEQ_SIG       0x53053053
+
+#define rseq_smp_mb()  __asm__ __volatile__ ("sync" ::: "memory")
+#define rseq_smp_rmb() rseq_smp_mb()
+#define rseq_smp_wmb() rseq_smp_mb()
+
+#define rseq_smp_load_acquire(p)                                       \
+__extension__ ({                                                       \
+       __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p);                       \
+       rseq_smp_mb();                                                  \
+       ____p1;                                                         \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep()     rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v)                                   \
+do {                                                                   \
+       rseq_smp_mb();                                                  \
+       RSEQ_WRITE_ONCE(*p, v);                                         \
+} while (0)
+
+#ifdef RSEQ_SKIP_FASTPATH
+#include "rseq-skip.h"
+#else /* !RSEQ_SKIP_FASTPATH */
+
+#if _MIPS_SZLONG == 64
+# define LONG                  ".dword"
+# define LONG_LA               "dla"
+# define LONG_L                        "ld"
+# define LONG_S                        "sd"
+# define LONG_ADDI             "daddiu"
+# define U32_U64_PAD(x)                x
+#elif _MIPS_SZLONG == 32
+# define LONG                  ".word"
+# define LONG_LA               "la"
+# define LONG_L                        "lw"
+# define LONG_S                        "sw"
+# define LONG_ADDI             "addiu"
+# ifdef __BIG_ENDIAN
+#  define U32_U64_PAD(x)       "0x0, " x
+# else
+#  define U32_U64_PAD(x)       x ", 0x0"
+# endif
+#else
+# error unsupported _MIPS_SZLONG
+#endif
+
+#define __RSEQ_ASM_DEFINE_TABLE(version, flags,        start_ip, \
+                               post_commit_offset, abort_ip) \
+               ".pushsection __rseq_table, \"aw\"\n\t" \
+               ".balign 32\n\t" \
+               ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+               LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+               LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+               LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+               ".popsection\n\t"
+
+#define RSEQ_ASM_DEFINE_TABLE(start_ip, post_commit_ip, abort_ip) \
+       __RSEQ_ASM_DEFINE_TABLE(0x0, 0x0, start_ip, \
+                               (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+               RSEQ_INJECT_ASM(1) \
+               LONG_LA " $4, " __rseq_str(cs_label) "\n\t" \
+               LONG_S  " $4, %[" __rseq_str(rseq_cs) "]\n\t" \
+               __rseq_str(label) ":\n\t"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+               RSEQ_INJECT_ASM(2) \
+               "lw  $4, %[" __rseq_str(current_cpu_id) "]\n\t" \
+               "bne $4, %[" __rseq_str(cpu_id) "], " __rseq_str(label) "\n\t"
+
+#define __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+                               abort_label, version, flags, \
+                               start_ip, post_commit_offset, abort_ip) \
+               ".balign 32\n\t" \
+               __rseq_str(table_label) ":\n\t" \
+               ".word " __rseq_str(version) ", " __rseq_str(flags) "\n\t" \
+               LONG " " U32_U64_PAD(__rseq_str(start_ip)) "\n\t" \
+               LONG " " U32_U64_PAD(__rseq_str(post_commit_offset)) "\n\t" \
+               LONG " " U32_U64_PAD(__rseq_str(abort_ip)) "\n\t" \
+               ".word " __rseq_str(RSEQ_SIG) "\n\t" \
+               __rseq_str(label) ":\n\t" \
+               teardown \
+               "b %l[" __rseq_str(abort_label) "]\n\t"
+
+#define RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, abort_label, \
+                             start_ip, post_commit_ip, abort_ip) \
+       __RSEQ_ASM_DEFINE_ABORT(table_label, label, teardown, \
+                               abort_label, 0x0, 0x0, start_ip, \
+                               (post_commit_ip - start_ip), abort_ip)
+
+#define RSEQ_ASM_DEFINE_CMPFAIL(label, teardown, cmpfail_label) \
+               __rseq_str(label) ":\n\t" \
+               teardown \
+               "b %l[" __rseq_str(cmpfail_label) "]\n\t"
+
+#define rseq_workaround_gcc_asm_size_guess()   __asm__ __volatile__("")
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+       RSEQ_INJECT_C(9)
+
+       rseq_workaround_gcc_asm_size_guess();
+       __asm__ __volatile__ goto (
+               RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+               /* Start rseq by storing table entry pointer into rseq_cs. */
+               RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+               RSEQ_INJECT_ASM(3)
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], %l[cmpfail]\n\t"
+               RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], %l[error2]\n\t"
+#endif
+               /* final store */
+               LONG_S " %[newv], %[v]\n\t"
+               "2:\n\t"
+               RSEQ_INJECT_ASM(5)
+               "b 5f\n\t"
+               RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+               "5:\n\t"
+               : /* gcc asm goto does not allow outputs */
+               : [cpu_id]              "r" (cpu),
+                 [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+                 [rseq_cs]             "m" (__rseq_abi.rseq_cs),
+                 [v]                   "m" (*v),
+                 [expect]              "r" (expect),
+                 [newv]                "r" (newv)
+                 RSEQ_INJECT_INPUT
+               : "$4", "memory"
+                 RSEQ_INJECT_CLOBBER
+               : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+                 , error1, error2
+#endif
+       );
+       rseq_workaround_gcc_asm_size_guess();
+       return 0;
+abort:
+       rseq_workaround_gcc_asm_size_guess();
+       RSEQ_INJECT_FAILED
+       return -1;
+cmpfail:
+       rseq_workaround_gcc_asm_size_guess();
+       return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+       rseq_bug("cpu_id comparison failed");
+error2:
+       rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
+                              off_t voffp, intptr_t *load, int cpu)
+{
+       RSEQ_INJECT_C(9)
+
+       rseq_workaround_gcc_asm_size_guess();
+       __asm__ __volatile__ goto (
+               RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+               /* Start rseq by storing table entry pointer into rseq_cs. */
+               RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+               RSEQ_INJECT_ASM(3)
+               LONG_L " $4, %[v]\n\t"
+               "beq $4, %[expectnot], %l[cmpfail]\n\t"
+               RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+               LONG_L " $4, %[v]\n\t"
+               "beq $4, %[expectnot], %l[error2]\n\t"
+#endif
+               LONG_S " $4, %[load]\n\t"
+               LONG_ADDI " $4, %[voffp]\n\t"
+               LONG_L " $4, 0($4)\n\t"
+               /* final store */
+               LONG_S " $4, %[v]\n\t"
+               "2:\n\t"
+               RSEQ_INJECT_ASM(5)
+               "b 5f\n\t"
+               RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+               "5:\n\t"
+               : /* gcc asm goto does not allow outputs */
+               : [cpu_id]              "r" (cpu),
+                 [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+                 [rseq_cs]             "m" (__rseq_abi.rseq_cs),
+                 /* final store input */
+                 [v]                   "m" (*v),
+                 [expectnot]           "r" (expectnot),
+                 [voffp]               "Ir" (voffp),
+                 [load]                "m" (*load)
+                 RSEQ_INJECT_INPUT
+               : "$4", "memory"
+                 RSEQ_INJECT_CLOBBER
+               : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+                 , error1, error2
+#endif
+       );
+       rseq_workaround_gcc_asm_size_guess();
+       return 0;
+abort:
+       rseq_workaround_gcc_asm_size_guess();
+       RSEQ_INJECT_FAILED
+       return -1;
+cmpfail:
+       rseq_workaround_gcc_asm_size_guess();
+       return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+       rseq_bug("cpu_id comparison failed");
+error2:
+       rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(intptr_t *v, intptr_t count, int cpu)
+{
+       RSEQ_INJECT_C(9)
+
+       rseq_workaround_gcc_asm_size_guess();
+       __asm__ __volatile__ goto (
+               RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+               /* Start rseq by storing table entry pointer into rseq_cs. */
+               RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+               RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+               LONG_L " $4, %[v]\n\t"
+               LONG_ADDI " $4, %[count]\n\t"
+               /* final store */
+               LONG_S " $4, %[v]\n\t"
+               "2:\n\t"
+               RSEQ_INJECT_ASM(4)
+               "b 5f\n\t"
+               RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+               "5:\n\t"
+               : /* gcc asm goto does not allow outputs */
+               : [cpu_id]              "r" (cpu),
+                 [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+                 [rseq_cs]             "m" (__rseq_abi.rseq_cs),
+                 [v]                   "m" (*v),
+                 [count]               "Ir" (count)
+                 RSEQ_INJECT_INPUT
+               : "$4", "memory"
+                 RSEQ_INJECT_CLOBBER
+               : abort
+#ifdef RSEQ_COMPARE_TWICE
+                 , error1
+#endif
+       );
+       rseq_workaround_gcc_asm_size_guess();
+       return 0;
+abort:
+       rseq_workaround_gcc_asm_size_guess();
+       RSEQ_INJECT_FAILED
+       return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+       rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
+                                intptr_t *v2, intptr_t newv2,
+                                intptr_t newv, int cpu)
+{
+       RSEQ_INJECT_C(9)
+
+       rseq_workaround_gcc_asm_size_guess();
+       __asm__ __volatile__ goto (
+               RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+               /* Start rseq by storing table entry pointer into rseq_cs. */
+               RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+               RSEQ_INJECT_ASM(3)
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], %l[cmpfail]\n\t"
+               RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], %l[error2]\n\t"
+#endif
+               /* try store */
+               LONG_S " %[newv2], %[v2]\n\t"
+               RSEQ_INJECT_ASM(5)
+               /* final store */
+               LONG_S " %[newv], %[v]\n\t"
+               "2:\n\t"
+               RSEQ_INJECT_ASM(6)
+               "b 5f\n\t"
+               RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+               "5:\n\t"
+               : /* gcc asm goto does not allow outputs */
+               : [cpu_id]              "r" (cpu),
+                 [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+                 [rseq_cs]             "m" (__rseq_abi.rseq_cs),
+                 /* try store input */
+                 [v2]                  "m" (*v2),
+                 [newv2]               "r" (newv2),
+                 /* final store input */
+                 [v]                   "m" (*v),
+                 [expect]              "r" (expect),
+                 [newv]                "r" (newv)
+                 RSEQ_INJECT_INPUT
+               : "$4", "memory"
+                 RSEQ_INJECT_CLOBBER
+               : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+                 , error1, error2
+#endif
+       );
+       rseq_workaround_gcc_asm_size_guess();
+       return 0;
+abort:
+       rseq_workaround_gcc_asm_size_guess();
+       RSEQ_INJECT_FAILED
+       return -1;
+cmpfail:
+       rseq_workaround_gcc_asm_size_guess();
+       return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+       rseq_bug("cpu_id comparison failed");
+error2:
+       rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
+                                        intptr_t *v2, intptr_t newv2,
+                                        intptr_t newv, int cpu)
+{
+       RSEQ_INJECT_C(9)
+
+       rseq_workaround_gcc_asm_size_guess();
+       __asm__ __volatile__ goto (
+               RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+               /* Start rseq by storing table entry pointer into rseq_cs. */
+               RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+               RSEQ_INJECT_ASM(3)
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], %l[cmpfail]\n\t"
+               RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], %l[error2]\n\t"
+#endif
+               /* try store */
+               LONG_S " %[newv2], %[v2]\n\t"
+               RSEQ_INJECT_ASM(5)
+               "sync\n\t"      /* full sync provides store-release */
+               /* final store */
+               LONG_S " %[newv], %[v]\n\t"
+               "2:\n\t"
+               RSEQ_INJECT_ASM(6)
+               "b 5f\n\t"
+               RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+               "5:\n\t"
+               : /* gcc asm goto does not allow outputs */
+               : [cpu_id]              "r" (cpu),
+                 [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+                 [rseq_cs]             "m" (__rseq_abi.rseq_cs),
+                 /* try store input */
+                 [v2]                  "m" (*v2),
+                 [newv2]               "r" (newv2),
+                 /* final store input */
+                 [v]                   "m" (*v),
+                 [expect]              "r" (expect),
+                 [newv]                "r" (newv)
+                 RSEQ_INJECT_INPUT
+               : "$4", "memory"
+                 RSEQ_INJECT_CLOBBER
+               : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+                 , error1, error2
+#endif
+       );
+       rseq_workaround_gcc_asm_size_guess();
+       return 0;
+abort:
+       rseq_workaround_gcc_asm_size_guess();
+       RSEQ_INJECT_FAILED
+       return -1;
+cmpfail:
+       rseq_workaround_gcc_asm_size_guess();
+       return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+       rseq_bug("cpu_id comparison failed");
+error2:
+       rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
+                             intptr_t *v2, intptr_t expect2,
+                             intptr_t newv, int cpu)
+{
+       RSEQ_INJECT_C(9)
+
+       rseq_workaround_gcc_asm_size_guess();
+       __asm__ __volatile__ goto (
+               RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+               /* Start rseq by storing table entry pointer into rseq_cs. */
+               RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+               RSEQ_INJECT_ASM(3)
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], %l[cmpfail]\n\t"
+               RSEQ_INJECT_ASM(4)
+               LONG_L " $4, %[v2]\n\t"
+               "bne $4, %[expect2], %l[cmpfail]\n\t"
+               RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], %l[error2]\n\t"
+               LONG_L " $4, %[v2]\n\t"
+               "bne $4, %[expect2], %l[error3]\n\t"
+#endif
+               /* final store */
+               LONG_S " %[newv], %[v]\n\t"
+               "2:\n\t"
+               RSEQ_INJECT_ASM(6)
+               "b 5f\n\t"
+               RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+               "5:\n\t"
+               : /* gcc asm goto does not allow outputs */
+               : [cpu_id]              "r" (cpu),
+                 [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+                 [rseq_cs]             "m" (__rseq_abi.rseq_cs),
+                 /* cmp2 input */
+                 [v2]                  "m" (*v2),
+                 [expect2]             "r" (expect2),
+                 /* final store input */
+                 [v]                   "m" (*v),
+                 [expect]              "r" (expect),
+                 [newv]                "r" (newv)
+                 RSEQ_INJECT_INPUT
+               : "$4", "memory"
+                 RSEQ_INJECT_CLOBBER
+               : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+                 , error1, error2, error3
+#endif
+       );
+       rseq_workaround_gcc_asm_size_guess();
+       return 0;
+abort:
+       rseq_workaround_gcc_asm_size_guess();
+       RSEQ_INJECT_FAILED
+       return -1;
+cmpfail:
+       rseq_workaround_gcc_asm_size_guess();
+       return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+       rseq_bug("cpu_id comparison failed");
+error2:
+       rseq_bug("1st expected value comparison failed");
+error3:
+       rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
+                                void *dst, void *src, size_t len,
+                                intptr_t newv, int cpu)
+{
+       uintptr_t rseq_scratch[3];
+
+       RSEQ_INJECT_C(9)
+
+       rseq_workaround_gcc_asm_size_guess();
+       __asm__ __volatile__ goto (
+               RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+               LONG_S " %[src], %[rseq_scratch0]\n\t"
+               LONG_S "  %[dst], %[rseq_scratch1]\n\t"
+               LONG_S " %[len], %[rseq_scratch2]\n\t"
+               /* Start rseq by storing table entry pointer into rseq_cs. */
+               RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+               RSEQ_INJECT_ASM(3)
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], 5f\n\t"
+               RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], 7f\n\t"
+#endif
+               /* try memcpy */
+               "beqz %[len], 333f\n\t" \
+               "222:\n\t" \
+               "lb   $4, 0(%[src])\n\t" \
+               "sb   $4, 0(%[dst])\n\t" \
+               LONG_ADDI " %[src], 1\n\t" \
+               LONG_ADDI " %[dst], 1\n\t" \
+               LONG_ADDI " %[len], -1\n\t" \
+               "bnez %[len], 222b\n\t" \
+               "333:\n\t" \
+               RSEQ_INJECT_ASM(5)
+               /* final store */
+               LONG_S " %[newv], %[v]\n\t"
+               "2:\n\t"
+               RSEQ_INJECT_ASM(6)
+               /* teardown */
+               LONG_L " %[len], %[rseq_scratch2]\n\t"
+               LONG_L " %[dst], %[rseq_scratch1]\n\t"
+               LONG_L " %[src], %[rseq_scratch0]\n\t"
+               "b 8f\n\t"
+               RSEQ_ASM_DEFINE_ABORT(3, 4,
+                                     /* teardown */
+                                     LONG_L " %[len], %[rseq_scratch2]\n\t"
+                                     LONG_L " %[dst], %[rseq_scratch1]\n\t"
+                                     LONG_L " %[src], %[rseq_scratch0]\n\t",
+                                     abort, 1b, 2b, 4f)
+               RSEQ_ASM_DEFINE_CMPFAIL(5,
+                                       /* teardown */
+                                       LONG_L " %[len], %[rseq_scratch2]\n\t"
+                                       LONG_L " %[dst], %[rseq_scratch1]\n\t"
+                                       LONG_L " %[src], %[rseq_scratch0]\n\t",
+                                       cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_DEFINE_CMPFAIL(6,
+                                       /* teardown */
+                                       LONG_L " %[len], %[rseq_scratch2]\n\t"
+                                       LONG_L " %[dst], %[rseq_scratch1]\n\t"
+                                       LONG_L " %[src], %[rseq_scratch0]\n\t",
+                                       error1)
+               RSEQ_ASM_DEFINE_CMPFAIL(7,
+                                       /* teardown */
+                                       LONG_L " %[len], %[rseq_scratch2]\n\t"
+                                       LONG_L " %[dst], %[rseq_scratch1]\n\t"
+                                       LONG_L " %[src], %[rseq_scratch0]\n\t",
+                                       error2)
+#endif
+               "8:\n\t"
+               : /* gcc asm goto does not allow outputs */
+               : [cpu_id]              "r" (cpu),
+                 [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+                 [rseq_cs]             "m" (__rseq_abi.rseq_cs),
+                 /* final store input */
+                 [v]                   "m" (*v),
+                 [expect]              "r" (expect),
+                 [newv]                "r" (newv),
+                 /* try memcpy input */
+                 [dst]                 "r" (dst),
+                 [src]                 "r" (src),
+                 [len]                 "r" (len),
+                 [rseq_scratch0]       "m" (rseq_scratch[0]),
+                 [rseq_scratch1]       "m" (rseq_scratch[1]),
+                 [rseq_scratch2]       "m" (rseq_scratch[2])
+                 RSEQ_INJECT_INPUT
+               : "$4", "memory"
+                 RSEQ_INJECT_CLOBBER
+               : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+                 , error1, error2
+#endif
+       );
+       rseq_workaround_gcc_asm_size_guess();
+       return 0;
+abort:
+       rseq_workaround_gcc_asm_size_guess();
+       RSEQ_INJECT_FAILED
+       return -1;
+cmpfail:
+       rseq_workaround_gcc_asm_size_guess();
+       return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+       rseq_workaround_gcc_asm_size_guess();
+       rseq_bug("cpu_id comparison failed");
+error2:
+       rseq_workaround_gcc_asm_size_guess();
+       rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
+                                        void *dst, void *src, size_t len,
+                                        intptr_t newv, int cpu)
+{
+       uintptr_t rseq_scratch[3];
+
+       RSEQ_INJECT_C(9)
+
+       rseq_workaround_gcc_asm_size_guess();
+       __asm__ __volatile__ goto (
+               RSEQ_ASM_DEFINE_TABLE(1f, 2f, 4f) /* start, commit, abort */
+               LONG_S " %[src], %[rseq_scratch0]\n\t"
+               LONG_S " %[dst], %[rseq_scratch1]\n\t"
+               LONG_S " %[len], %[rseq_scratch2]\n\t"
+               /* Start rseq by storing table entry pointer into rseq_cs. */
+               RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+               RSEQ_INJECT_ASM(3)
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], 5f\n\t"
+               RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+               LONG_L " $4, %[v]\n\t"
+               "bne $4, %[expect], 7f\n\t"
+#endif
+               /* try memcpy */
+               "beqz %[len], 333f\n\t" \
+               "222:\n\t" \
+               "lb   $4, 0(%[src])\n\t" \
+               "sb   $4, 0(%[dst])\n\t" \
+               LONG_ADDI " %[src], 1\n\t" \
+               LONG_ADDI " %[dst], 1\n\t" \
+               LONG_ADDI " %[len], -1\n\t" \
+               "bnez %[len], 222b\n\t" \
+               "333:\n\t" \
+               RSEQ_INJECT_ASM(5)
+               "sync\n\t"      /* full sync provides store-release */
+               /* final store */
+               LONG_S " %[newv], %[v]\n\t"
+               "2:\n\t"
+               RSEQ_INJECT_ASM(6)
+               /* teardown */
+               LONG_L " %[len], %[rseq_scratch2]\n\t"
+               LONG_L " %[dst], %[rseq_scratch1]\n\t"
+               LONG_L " %[src], %[rseq_scratch0]\n\t"
+               "b 8f\n\t"
+               RSEQ_ASM_DEFINE_ABORT(3, 4,
+                                     /* teardown */
+                                     LONG_L " %[len], %[rseq_scratch2]\n\t"
+                                     LONG_L " %[dst], %[rseq_scratch1]\n\t"
+                                     LONG_L " %[src], %[rseq_scratch0]\n\t",
+                                     abort, 1b, 2b, 4f)
+               RSEQ_ASM_DEFINE_CMPFAIL(5,
+                                       /* teardown */
+                                       LONG_L " %[len], %[rseq_scratch2]\n\t"
+                                       LONG_L " %[dst], %[rseq_scratch1]\n\t"
+                                       LONG_L " %[src], %[rseq_scratch0]\n\t",
+                                       cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+               RSEQ_ASM_DEFINE_CMPFAIL(6,
+                                       /* teardown */
+                                       LONG_L " %[len], %[rseq_scratch2]\n\t"
+                                       LONG_L " %[dst], %[rseq_scratch1]\n\t"
+                                       LONG_L " %[src], %[rseq_scratch0]\n\t",
+                                       error1)
+               RSEQ_ASM_DEFINE_CMPFAIL(7,
+                                       /* teardown */
+                                       LONG_L " %[len], %[rseq_scratch2]\n\t"
+                                       LONG_L " %[dst], %[rseq_scratch1]\n\t"
+                                       LONG_L " %[src], %[rseq_scratch0]\n\t",
+                                       error2)
+#endif
+               "8:\n\t"
+               : /* gcc asm goto does not allow outputs */
+               : [cpu_id]              "r" (cpu),
+                 [current_cpu_id]      "m" (__rseq_abi.cpu_id),
+                 [rseq_cs]             "m" (__rseq_abi.rseq_cs),
+                 /* final store input */
+                 [v]                   "m" (*v),
+                 [expect]              "r" (expect),
+                 [newv]                "r" (newv),
+                 /* try memcpy input */
+                 [dst]                 "r" (dst),
+                 [src]                 "r" (src),
+                 [len]                 "r" (len),
+                 [rseq_scratch0]       "m" (rseq_scratch[0]),
+                 [rseq_scratch1]       "m" (rseq_scratch[1]),
+                 [rseq_scratch2]       "m" (rseq_scratch[2])
+                 RSEQ_INJECT_INPUT
+               : "$4", "memory"
+                 RSEQ_INJECT_CLOBBER
+               : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+                 , error1, error2
+#endif
+       );
+       rseq_workaround_gcc_asm_size_guess();
+       return 0;
+abort:
+       rseq_workaround_gcc_asm_size_guess();
+       RSEQ_INJECT_FAILED
+       return -1;
+cmpfail:
+       rseq_workaround_gcc_asm_size_guess();
+       return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+       rseq_workaround_gcc_asm_size_guess();
+       rseq_bug("cpu_id comparison failed");
+error2:
+       rseq_workaround_gcc_asm_size_guess();
+       rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* !RSEQ_SKIP_FASTPATH */
index 0a80857..a468411 100644 (file)
@@ -73,6 +73,8 @@ extern __thread volatile struct rseq __rseq_abi;
 #include <rseq-arm.h>
 #elif defined(__PPC__)
 #include <rseq-ppc.h>
+#elif defined(__mips__)
+#include <rseq-mips.h>
 #else
 #error unsupported target
 #endif
index 2082eef..a19531d 100644 (file)
@@ -1,7 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/x86_64/x86/)
+
+ifneq ($(ARCH),sparc64)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
 SUBDIRS := drivers
 
 TEST_PROGS := run.sh
 
+
 .PHONY: all clean
 
 include ../lib.mk
@@ -18,10 +29,6 @@ all:
                fi \
        done
 
-override define RUN_TESTS
-       @cd $(OUTPUT); ./run.sh
-endef
-
 override define INSTALL_RULE
        mkdir -p $(INSTALL_PATH)
        install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
@@ -33,10 +40,6 @@ override define INSTALL_RULE
        done;
 endef
 
-override define EMIT_TESTS
-       echo "./run.sh"
-endef
-
 override define CLEAN
        @for DIR in $(SUBDIRS); do              \
                BUILD_TARGET=$(OUTPUT)/$$DIR;   \
@@ -44,3 +47,4 @@ override define CLEAN
                make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
        done
 endef
+endif
index 6264f40..deb0df4 100644 (file)
@@ -1,4 +1,4 @@
-
+# SPDX-License-Identifier: GPL-2.0
 INCLUDEDIR := -I.
 CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
 
index 24cff49..fc9f8cd 100755 (executable)
@@ -2,6 +2,19 @@
 # SPDX-License-Identifier: GPL-2.0
 # Runs static keys kernel module tests
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_static_key_base; then
+       echo "static_key: module test_static_key_base is not found [SKIP]"
+       exit $ksft_skip
+fi
+
+if ! /sbin/modprobe -q -n test_static_keys; then
+       echo "static_key: module test_static_keys is not found [SKIP]"
+       exit $ksft_skip
+fi
+
 if /sbin/modprobe -q test_static_key_base; then
        if /sbin/modprobe -q test_static_keys; then
                echo "static_key: ok"
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
new file mode 100644 (file)
index 0000000..1ab7e81
--- /dev/null
@@ -0,0 +1,4 @@
+CONFIG_STAGING=y
+CONFIG_ANDROID=y
+CONFIG_SYNC=y
+CONFIG_SW_SYNC=y
index ec232c3..584eb8e 100755 (executable)
@@ -14,6 +14,9 @@
 
 # This performs a series tests against the proc sysctl interface.
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 TEST_NAME="sysctl"
 TEST_DRIVER="test_${TEST_NAME}"
 TEST_DIR=$(dirname $0)
@@ -41,7 +44,7 @@ test_modprobe()
                echo "$0: $DIR not present" >&2
                echo "You must have the following enabled in your kernel:" >&2
                cat $TEST_DIR/config >&2
-               exit 1
+               exit $ksft_skip
        fi
 }
 
@@ -98,28 +101,30 @@ test_reqs()
        uid=$(id -u)
        if [ $uid -ne 0 ]; then
                echo $msg must be run as root >&2
-               exit 0
+               exit $ksft_skip
        fi
 
        if ! which perl 2> /dev/null > /dev/null; then
                echo "$0: You need perl installed"
-               exit 1
+               exit $ksft_skip
        fi
        if ! which getconf 2> /dev/null > /dev/null; then
                echo "$0: You need getconf installed"
-               exit 1
+               exit $ksft_skip
        fi
        if ! which diff 2> /dev/null > /dev/null; then
                echo "$0: You need diff installed"
-               exit 1
+               exit $ksft_skip
        fi
 }
 
 function load_req_mod()
 {
-       trap "test_modprobe" EXIT
-
        if [ ! -d $DIR ]; then
+               if ! modprobe -q -n $TEST_DRIVER; then
+                       echo "$0: module $TEST_DRIVER not found [SKIP]"
+                       exit $ksft_skip
+               fi
                modprobe $TEST_DRIVER
                if [ $? -ne 0 ]; then
                        exit
@@ -765,6 +770,7 @@ function parse_args()
 test_reqs
 allow_user_defaults
 check_production_sysctl_writes_strict
+test_modprobe
 load_req_mod
 
 trap "test_finish" EXIT
index d60506f..f9b31a5 100755 (executable)
@@ -2,6 +2,13 @@
 # SPDX-License-Identifier: GPL-2.0
 # Runs copy_to/from_user infrastructure using test_user_copy kernel module
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if ! /sbin/modprobe -q -n test_user_copy; then
+       echo "user: module test_user_copy is not found [SKIP]"
+       exit $ksft_skip
+fi
 if /sbin/modprobe -q test_user_copy; then
        /sbin/modprobe -q -r test_user_copy
        echo "user_copy: ok"
index 1097f04..bcec712 100644 (file)
@@ -16,6 +16,8 @@
 #include <unistd.h>
 #include <string.h>
 
+#include "../kselftest.h"
+
 #define MAP_SIZE 1048576
 
 struct map_list {
@@ -169,7 +171,7 @@ int main(int argc, char **argv)
                printf("Either the sysctl compact_unevictable_allowed is not\n"
                       "set to 1 or couldn't read the proc file.\n"
                       "Skipping the test\n");
-               return 0;
+               return KSFT_SKIP;
        }
 
        lim.rlim_cur = RLIM_INFINITY;
index 4997b92..637b6d0 100644 (file)
@@ -9,6 +9,8 @@
 #include <stdbool.h>
 #include "mlock2.h"
 
+#include "../kselftest.h"
+
 struct vm_boundaries {
        unsigned long start;
        unsigned long end;
@@ -303,7 +305,7 @@ static int test_mlock_lock()
        if (mlock2_(map, 2 * page_size, 0)) {
                if (errno == ENOSYS) {
                        printf("Cannot call new mlock family, skipping test\n");
-                       _exit(0);
+                       _exit(KSFT_SKIP);
                }
                perror("mlock2(0)");
                goto unmap;
@@ -412,7 +414,7 @@ static int test_mlock_onfault()
        if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
                if (errno == ENOSYS) {
                        printf("Cannot call new mlock family, skipping test\n");
-                       _exit(0);
+                       _exit(KSFT_SKIP);
                }
                perror("mlock2(MLOCK_ONFAULT)");
                goto unmap;
@@ -425,7 +427,7 @@ static int test_mlock_onfault()
        if (munlock(map, 2 * page_size)) {
                if (errno == ENOSYS) {
                        printf("Cannot call new mlock family, skipping test\n");
-                       _exit(0);
+                       _exit(KSFT_SKIP);
                }
                perror("munlock()");
                goto unmap;
@@ -457,7 +459,7 @@ static int test_lock_onfault_of_present()
        if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
                if (errno == ENOSYS) {
                        printf("Cannot call new mlock family, skipping test\n");
-                       _exit(0);
+                       _exit(KSFT_SKIP);
                }
                perror("mlock2(MLOCK_ONFAULT)");
                goto unmap;
@@ -583,7 +585,7 @@ static int test_vma_management(bool call_mlock)
        if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
                if (errno == ENOSYS) {
                        printf("Cannot call new mlock family, skipping test\n");
-                       _exit(0);
+                       _exit(KSFT_SKIP);
                }
                perror("mlock(ONFAULT)\n");
                goto out;
index 22d5646..88cbe55 100755 (executable)
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 #please run as root
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 mnt=./huge
 exitcode=0
 
@@ -36,7 +39,7 @@ if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
                echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
                if [ $? -ne 0 ]; then
                        echo "Please run this test as root"
-                       exit 1
+                       exit $ksft_skip
                fi
                while read name size unit; do
                        if [ "$name" = "HugePages_Free:" ]; then
index de2f9ec..7b8171e 100644 (file)
@@ -69,6 +69,8 @@
 #include <setjmp.h>
 #include <stdbool.h>
 
+#include "../kselftest.h"
+
 #ifdef __NR_userfaultfd
 
 static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
@@ -1322,7 +1324,7 @@ int main(int argc, char **argv)
 int main(void)
 {
        printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
-       return 0;
+       return KSFT_SKIP;
 }
 
 #endif /* __NR_userfaultfd */
index 754de7d..232e958 100755 (executable)
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 TCID="zram.sh"
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 . ./zram_lib.sh
 
 run_zram () {
@@ -24,5 +27,5 @@ elif [ -b /dev/zram0 ]; then
 else
        echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
        echo "$TCID : CONFIG_ZRAM is not set"
-       exit 1
+       exit $ksft_skip
 fi
index f6a9c73..9e73a4f 100755 (executable)
@@ -18,6 +18,9 @@ MODULE=0
 dev_makeswap=-1
 dev_mounted=-1
 
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
 trap INT
 
 check_prereqs()
@@ -27,7 +30,7 @@ check_prereqs()
 
        if [ $uid -ne 0 ]; then
                echo $msg must be run as root >&2
-               exit 0
+               exit $ksft_skip
        fi
 }
 
index 72143cf..ea434dd 100644 (file)
@@ -47,7 +47,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT
 
 config KVM_COMPAT
        def_bool y
-       depends on KVM && COMPAT && !S390
+       depends on KVM && COMPAT && !(S390 || ARM64)
 
 config HAVE_KVM_IRQ_BYPASS
        bool
index 8d90de2..1d90d79 100644 (file)
@@ -297,6 +297,8 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
        phys_addr_t next;
 
        assert_spin_locked(&kvm->mmu_lock);
+       WARN_ON(size & ~PAGE_MASK);
+
        pgd = kvm->arch.pgd + stage2_pgd_index(addr);
        do {
                /*
index ff7dc89..cdce653 100644 (file)
@@ -617,11 +617,6 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
                pr_warn("GICV physical address 0x%llx not page aligned\n",
                        (unsigned long long)info->vcpu.start);
                kvm_vgic_global_state.vcpu_base = 0;
-       } else if (!PAGE_ALIGNED(resource_size(&info->vcpu))) {
-               pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n",
-                       (unsigned long long)resource_size(&info->vcpu),
-                       PAGE_SIZE);
-               kvm_vgic_global_state.vcpu_base = 0;
        } else {
                kvm_vgic_global_state.vcpu_base = info->vcpu.start;
                kvm_vgic_global_state.can_emulate_gicv2 = true;
index ada21f4..8b47507 100644 (file)
@@ -116,6 +116,11 @@ static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
 #ifdef CONFIG_KVM_COMPAT
 static long kvm_vcpu_compat_ioctl(struct file *file, unsigned int ioctl,
                                  unsigned long arg);
+#define KVM_COMPAT(c)  .compat_ioctl   = (c)
+#else
+static long kvm_no_compat_ioctl(struct file *file, unsigned int ioctl,
+                               unsigned long arg) { return -EINVAL; }
+#define KVM_COMPAT(c)  .compat_ioctl   = kvm_no_compat_ioctl
 #endif
 static int hardware_enable_all(void);
 static void hardware_disable_all(void);
@@ -2396,11 +2401,9 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp)
 static struct file_operations kvm_vcpu_fops = {
        .release        = kvm_vcpu_release,
        .unlocked_ioctl = kvm_vcpu_ioctl,
-#ifdef CONFIG_KVM_COMPAT
-       .compat_ioctl   = kvm_vcpu_compat_ioctl,
-#endif
        .mmap           = kvm_vcpu_mmap,
        .llseek         = noop_llseek,
+       KVM_COMPAT(kvm_vcpu_compat_ioctl),
 };
 
 /*
@@ -2824,10 +2827,8 @@ static int kvm_device_release(struct inode *inode, struct file *filp)
 
 static const struct file_operations kvm_device_fops = {
        .unlocked_ioctl = kvm_device_ioctl,
-#ifdef CONFIG_KVM_COMPAT
-       .compat_ioctl = kvm_device_ioctl,
-#endif
        .release = kvm_device_release,
+       KVM_COMPAT(kvm_device_ioctl),
 };
 
 struct kvm_device *kvm_device_from_filp(struct file *filp)
@@ -3165,10 +3166,8 @@ static long kvm_vm_compat_ioctl(struct file *filp,
 static struct file_operations kvm_vm_fops = {
        .release        = kvm_vm_release,
        .unlocked_ioctl = kvm_vm_ioctl,
-#ifdef CONFIG_KVM_COMPAT
-       .compat_ioctl   = kvm_vm_compat_ioctl,
-#endif
        .llseek         = noop_llseek,
+       KVM_COMPAT(kvm_vm_compat_ioctl),
 };
 
 static int kvm_dev_ioctl_create_vm(unsigned long type)
@@ -3259,8 +3258,8 @@ out:
 
 static struct file_operations kvm_chardev_ops = {
        .unlocked_ioctl = kvm_dev_ioctl,
-       .compat_ioctl   = kvm_dev_ioctl,
        .llseek         = noop_llseek,
+       KVM_COMPAT(kvm_dev_ioctl),
 };
 
 static struct miscdevice kvm_dev = {